Skip to content

Commit

Permalink
Default LogsDB value for ignore_dynamic_beyond_limit (elastic#115265)
Browse files Browse the repository at this point in the history
When ingesting logs, it's important to ensure that documents are not dropped due to mapping issues, also when dealing with dynamically mapped fields. Elasticsearch provides two key settings that help manage the total number of field mappings and handle situations where this limit might be exceeded:

1. **`index.mapping.total_fields.limit`**: This setting defines the maximum number of fields allowed in an index. If this limit is reached, any further mapped fields would cause indexing to fail.

2. **`index.mapping.total_fields.ignore_dynamic_beyond_limit`**: This setting determines whether Elasticsearch should ignore any dynamically mapped fields that exceed the limit defined by `index.mapping.total_fields.limit`. If set to `false`, indexing will fail once the limit is surpassed. However, if set to `true`, Elasticsearch will continue indexing the document but will silently ignore any additional dynamically mapped fields beyond the limit.

To prevent indexing failures due to dynamic mapping issues, especially in logs where the schema might change frequently, we change the default value of **`index.mapping.total_fields.ignore_dynamic_beyond_limit` from `false` to `true` in LogsDB**. This change ensures that even when the number of dynamically mapped fields exceeds the set limit, documents will still be indexed, and additional fields will simply be ignored rather than causing an indexing failure.

This adjustment is important for LogsDB, where dynamically mapped fields may be common, and we want to make sure to avoid documents from being dropped.
  • Loading branch information
salvatore-campagna authored Oct 31, 2024
1 parent aaf7a3e commit 3cbbcc5
Show file tree
Hide file tree
Showing 5 changed files with 302 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
---
object with unmapped fields:
- requires:
cluster_features: ["mapper.track_ignored_source", "mapper.bwc_workaround_9_0"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,3 +599,284 @@ end time not allowed in logs mode:
- match: { error.root_cause.0.type: "illegal_argument_exception" }
- match: { error.type: "illegal_argument_exception" }
- match: { error.reason: "[index.time_series.end_time] requires [index.mode=time_series]" }

---
ignore dynamic beyond limit logsdb default value:
- requires:
cluster_features: [ "mapper.logsdb_default_ignore_dynamic_beyond_limit" ]
reason: requires logsdb default value for `index.mapping.total_fields.ignore_dynamic_beyond_limit`

- do:
indices.create:
index: test-ignore-dynamic-default
body:
settings:
index:
mode: logsdb

- do:
indices.get_settings:
index: test-ignore-dynamic-default
include_defaults: true

- match: { test-ignore-dynamic-default.settings.index.mode: "logsdb" }
- match: { test-ignore-dynamic-default.defaults.index.mapping.total_fields.limit: "1000" }
- match: { test-ignore-dynamic-default.defaults.index.mapping.total_fields.ignore_dynamic_beyond_limit: "true" }

---
ignore dynamic beyond limit logsdb override value:
- requires:
cluster_features: [ "mapper.logsdb_default_ignore_dynamic_beyond_limit" ]
reason: requires logsdb default value for `index.mapping.total_fields.ignore_dynamic_beyond_limit`

- do:
indices.create:
index: test-ignore-dynamic-override
body:
settings:
index:
mode: logsdb
mapping:
total_fields:
ignore_dynamic_beyond_limit: false

- do:
indices.get_settings:
index: test-ignore-dynamic-override

- match: { test-ignore-dynamic-override.settings.index.mode: "logsdb" }
- match: { test-ignore-dynamic-override.settings.index.mapping.total_fields.ignore_dynamic_beyond_limit: "false" }

---
logsdb with default ignore dynamic beyond limit and default sorting:
- requires:
cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"]
reason: requires default value for ignore_dynamic_beyond_limit

- do:
indices.create:
index: test-logsdb-default-sort
body:
settings:
index:
mode: logsdb
mapping:
# NOTE: When the index mode is set to `logsdb`, the `host.name` field is automatically injected if
# sort settings are not overridden.
# With `subobjects` set to `true` (default), this creates a `host` object field and a nested `name`
# keyword field (`host.name`).
#
# As a result, there are always at least 4 statically mapped fields (`@timestamp`, `host`, `host.name`
# and `name`). We cannot use a field limit lower than 4 because these fields are always present.
#
# Indeed, if `index.mapping.total_fields.ignore_dynamic_beyond_limit` is `true`, any dynamically
# mapped fields beyond the limit `index.mapping.total_fields.limit` are ignored, but the statically
# mapped fields are always counted.
total_fields:
limit: 4
mappings:
properties:
"@timestamp":
type: date
name:
type: keyword

- do:
indices.get_settings:
index: test-logsdb-default-sort

- match: { test-logsdb-default-sort.settings.index.mode: "logsdb" }

- do:
bulk:
index: test-logsdb-default-sort
refresh: true
body:
- '{ "index": { } }'
- '{ "@timestamp": "2024-08-13T12:30:00Z", "name": "foo", "host.name": "92f4a67c", "value": 10, "message": "the quick brown fox", "region": "us-west", "pid": 153462 }'
- '{ "index": { } }'
- '{ "@timestamp": "2024-08-13T12:01:00Z", "name": "bar", "host.name": "24eea278", "value": 20, "message": "jumps over the lazy dog", "region": "us-central", "pid": 674972 }'
- match: { errors: false }

- do:
search:
index: test-logsdb-default-sort
body:
query:
match_all: {}

- match: { hits.total.value: 2 }
- match: { hits.hits.0._source.name: "bar" }
- match: { hits.hits.0._source.value: 20 }
- match: { hits.hits.0._source.message: "jumps over the lazy dog" }
- match: { hits.hits.0._ignored: [ "message", "pid", "region", "value" ] }
- match: { hits.hits.1._source.name: "foo" }
- match: { hits.hits.1._source.value: 10 }
- match: { hits.hits.1._source.message: "the quick brown fox" }
- match: { hits.hits.1._ignored: [ "message", "pid", "region", "value" ] }

---
logsdb with default ignore dynamic beyond limit and non-default sorting:
- requires:
cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"]
reason: requires default value for ignore_dynamic_beyond_limit

- do:
indices.create:
index: test-logsdb-non-default-sort
body:
settings:
index:
sort.field: [ "name" ]
sort.order: [ "desc" ]
mode: logsdb
mapping:
# NOTE: Here sort settings are overridden and we do not have any additional statically mapped field other
# than `name` and `timestamp`. As a result, there are only 2 statically mapped fields.
total_fields:
limit: 2
mappings:
properties:
"@timestamp":
type: date
name:
type: keyword

- do:
indices.get_settings:
index: test-logsdb-non-default-sort

- match: { test-logsdb-non-default-sort.settings.index.mode: "logsdb" }

- do:
bulk:
index: test-logsdb-non-default-sort
refresh: true
body:
- '{ "index": { } }'
- '{ "@timestamp": "2024-08-13T12:30:00Z", "name": "foo", "host.name": "92f4a67c", "value": 10, "message": "the quick brown fox", "region": "us-west", "pid": 153462 }'
- '{ "index": { } }'
- '{ "@timestamp": "2024-08-13T12:01:00Z", "name": "bar", "host.name": "24eea278", "value": 20, "message": "jumps over the lazy dog", "region": "us-central", "pid": 674972 }'
- match: { errors: false }

- do:
search:
index: test-logsdb-non-default-sort
body:
query:
match_all: {}

- match: { hits.total.value: 2 }
- match: { hits.hits.0._source.name: "foo" }
- match: { hits.hits.0._source.value: 10 }
- match: { hits.hits.0._source.message: "the quick brown fox" }
- match: { hits.hits.0._ignored: [ "host", "message", "pid", "region", "value" ] }
- match: { hits.hits.1._source.name: "bar" }
- match: { hits.hits.1._source.value: 20 }
- match: { hits.hits.1._source.message: "jumps over the lazy dog" }
- match: { hits.hits.1._ignored: [ "host", "message", "pid", "region", "value" ] }

---
logsdb with default ignore dynamic beyond limit and too low limit:
- requires:
cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"]
reason: requires default value for ignore_dynamic_beyond_limit

- do:
catch: bad_request
indices.create:
index: test-logsdb-low-limit
body:
settings:
index:
mode: logsdb
mapping:
# NOTE: When the index mode is set to `logsdb`, the `host.name` field is automatically injected if
# sort settings are not overridden.
# With `subobjects` set to `true` (default), this creates a `host` object field and a nested `name`
# keyword field (`host.name`).
#
# As a result, there are always at least 4 statically mapped fields (`@timestamp`, `host`, `host.name`
# and `name`). We cannot use a field limit lower than 4 because these fields are always present.
#
# Indeed, if `index.mapping.total_fields.ignore_dynamic_beyond_limit` is `true`, any dynamically
# mapped fields beyond the limit `index.mapping.total_fields.limit` are ignored, but the statically
# mapped fields are always counted.
total_fields:
limit: 3
mappings:
properties:
"@timestamp":
type: date
name:
type: keyword
- match: { error.type: "illegal_argument_exception" }
- match: { error.reason: "Limit of total fields [3] has been exceeded" }

---
logsdb with default ignore dynamic beyond limit and subobjects false:
- requires:
cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"]
reason: requires default value for ignore_dynamic_beyond_limit

- do:
indices.create:
index: test-logsdb-subobjects-false
body:
settings:
index:
mode: logsdb
mapping:
# NOTE: When the index mode is set to `logsdb`, the `host.name` field is automatically injected if
# sort settings are not overridden.
# With `subobjects` set to `false` anyway, a single `host.name` keyword field is automatically mapped.
#
# As a result, there are just 3 statically mapped fields (`@timestamp`, `host.name` and `name`).
# We cannot use a field limit lower than 3 because these fields are always present.
#
# Indeed, if `index.mapping.total_fields.ignore_dynamic_beyond_limit` is `true`, any dynamically
# mapped fields beyond the limit `index.mapping.total_fields.limit` are ignored, but the statically
# mapped fields are always counted.
total_fields:
limit: 3
mappings:
subobjects: false
properties:
"@timestamp":
type: date
name:
type: keyword

- do:
indices.get_settings:
index: test-logsdb-subobjects-false

- match: { test-logsdb-subobjects-false.settings.index.mode: "logsdb" }

- do:
bulk:
index: test-logsdb-subobjects-false
refresh: true
body:
- '{ "index": { } }'
- '{ "@timestamp": "2024-08-13T12:30:00Z", "name": "foo", "host.name": "92f4a67c", "value": 10, "message": "the quick brown fox", "region": "us-west", "pid": 153462 }'
- '{ "index": { } }'
- '{ "@timestamp": "2024-08-13T12:01:00Z", "name": "bar", "host.name": "24eea278", "value": 20, "message": "jumps over the lazy dog", "region": "us-central", "pid": 674972 }'
- match: { errors: false }

- do:
search:
index: test-logsdb-subobjects-false
body:
query:
match_all: {}

- match: { hits.total.value: 2 }
- match: { hits.hits.0._source.name: "bar" }
- match: { hits.hits.0._source.value: 20 }
- match: { hits.hits.0._source.message: "jumps over the lazy dog" }
- match: { hits.hits.0._ignored: [ "message", "pid", "region", "value" ] }
- match: { hits.hits.1._source.name: "foo" }
- match: { hits.hits.1._source.value: 10 }
- match: { hits.hits.1._source.message: "the quick brown fox" }
- match: { hits.hits.1._ignored: [ "message", "pid", "region", "value" ] }
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,9 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion UPGRADE_TO_LUCENE_9_12 = def(8_516_00_0, Version.LUCENE_9_12_0);
public static final IndexVersion ENABLE_IGNORE_ABOVE_LOGSDB = def(8_517_00_0, Version.LUCENE_9_12_0);
public static final IndexVersion ADD_ROLE_MAPPING_CLEANUP_MIGRATION = def(8_518_00_0, Version.LUCENE_9_12_0);
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT = def(8_519_00_0, Version.LUCENE_9_12_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_00_0, Version.LUCENE_10_0_0);

public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_00_0, Version.LUCENE_10_0_0);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ public Set<NodeFeature> getTestFeatures() {
IgnoredSourceFieldMapper.DONT_EXPAND_DOTS_IN_IGNORED_SOURCE,
SourceFieldMapper.REMOVE_SYNTHETIC_SOURCE_ONLY_VALIDATION,
IgnoredSourceFieldMapper.IGNORED_SOURCE_AS_TOP_LEVEL_METADATA_ARRAY_FIELD,
IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS
IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS,
MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
Expand Down Expand Up @@ -121,9 +124,21 @@ public boolean isAutoUpdate() {
Property.IndexScope,
Property.ServerlessPublic
);

public static final NodeFeature LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = new NodeFeature(
"mapper.logsdb_default_ignore_dynamic_beyond_limit"
);
public static final Setting<Boolean> INDEX_MAPPING_IGNORE_DYNAMIC_BEYOND_LIMIT_SETTING = Setting.boolSetting(
"index.mapping.total_fields.ignore_dynamic_beyond_limit",
false,
settings -> {
boolean isLogsDBIndexMode = IndexSettings.MODE.get(settings) == IndexMode.LOGSDB;
final IndexVersion indexVersionCreated = IndexMetadata.SETTING_INDEX_VERSION_CREATED.get(settings);
boolean isNewIndexVersion = indexVersionCreated.between(
IndexVersions.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT,
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
) || indexVersionCreated.onOrAfter(IndexVersions.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT);
return String.valueOf(isLogsDBIndexMode && isNewIndexVersion);
},
Property.Dynamic,
Property.IndexScope,
Property.ServerlessPublic
Expand Down

0 comments on commit 3cbbcc5

Please sign in to comment.