From ff0f83f59d5eaa9214a1f4a819ad031985925e51 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Tue, 6 Feb 2024 12:58:42 +0100 Subject: [PATCH] Make field limit more predictable (#102885) Today, we're counting all mappers, including mappers for subfields that aren't explicitly added to the mapping towards the field limit. This means that some field types, such as `search_as_you_type` or `percolator` count as more than one field even though that's not apparent to users as they're just defining them as a single field in the mapping. This change makes it so that each field mapper only counts as one. We're still counting multi-fields. This makes it easier to understand for users why the field limit is hit. ~In addition to that, it also simplifies https://github.com/elastic/elasticsearch/pull/96235 as it makes the implementation of `Mapper.Builder#getTotalFieldsCount` much easier and easier to align with `Mapper#getTotalFieldsCount`. This reduces the risk of over- or under-estimating the field count of a `Mapper.Builder` in `DocumentParserContext#addDynamicMapper`, which in turn reduces the risk of data loss due to the issue described here: https://github.com/elastic/elasticsearch/pull/96235#discussion_r1402495749.~ *Edit: due to https://github.com/elastic/elasticsearch/pull/103865, we don't need an implementation of `getTotalFieldsCount` or `mapperSize` in `Mapper.Builder`. Still, this PR more closely aligns `Mapper#getTotalFieldsCount` with `MappingLookup#getTotalFieldsCount`, which `DocumentParserContext#addDynamicMapper` uses to determine whether the field limit is hit* A potential risk of this is that we're now effectively allowing more fields in the mapping. It may be surprising to users that more fields can be added to a mapping. Although, I'd not expect negative consequences from that. Generally, I'd expect users to be happy about any change that reduces the risk of data loss. We could also think about whether to apply the new counting logic only to new indices (depending on the `IndexVersion`). However, that would add more complexity and I'm not convinced about the value. We'd then need to maintain two different ways of counting fields and also require passing in the `IndexVersion` to `MappingLookup` which previously didn't require the `IndexVersion`. This PR is meant as a conversation starter. It would also simplify https://github.com/elastic/elasticsearch/pull/96235 but I don't think this blocks that PR in any way. I'm curious about the opinion of @javanna and @jpountz on this. --- docs/changelog/102885.yaml | 5 +++ .../LegacyGeoShapeFieldMapperTests.java | 6 +++ .../org/elasticsearch/index/IndexService.java | 2 +- .../index/mapper/DocumentParserContext.java | 2 +- .../index/mapper/FieldAliasMapper.java | 5 +++ .../index/mapper/FieldMapper.java | 8 +++- .../elasticsearch/index/mapper/Mapper.java | 14 ++----- .../index/mapper/MappingLookup.java | 12 +++++- .../index/mapper/ObjectMapper.java | 9 ++++- .../index/mapper/RootObjectMapper.java | 8 +--- .../index/mapper/FieldAliasMapperTests.java | 2 +- .../index/mapper/ObjectMapperMergeTests.java | 40 +++++++++---------- .../index/mapper/ObjectMapperTests.java | 11 +++-- .../index/mapper/RootObjectMapperTests.java | 2 +- .../index/mapper/MapperTestCase.java | 5 +++ 15 files changed, 83 insertions(+), 48 deletions(-) create mode 100644 docs/changelog/102885.yaml diff --git a/docs/changelog/102885.yaml b/docs/changelog/102885.yaml new file mode 100644 index 0000000000000..7a998c3eb1f66 --- /dev/null +++ b/docs/changelog/102885.yaml @@ -0,0 +1,5 @@ +pr: 102885 +summary: Make field limit more predictable +area: Mapping +type: enhancement +issues: [] diff --git a/modules/legacy-geo/src/test/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapperTests.java b/modules/legacy-geo/src/test/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapperTests.java index 91a94fe174c21..0a0bb12bedbae 100644 --- a/modules/legacy-geo/src/test/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapperTests.java +++ b/modules/legacy-geo/src/test/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapperTests.java @@ -76,6 +76,12 @@ protected boolean supportsStoredFields() { return false; } + @Override + public void testTotalFieldsCount() throws IOException { + super.testTotalFieldsCount(); + assertWarnings("Parameter [strategy] is deprecated and will be removed in a future version"); + } + @Override protected void registerParameters(ParameterChecker checker) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/IndexService.java b/server/src/main/java/org/elasticsearch/index/IndexService.java index c5a5e5a5c4b96..f534d8b2dc806 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexService.java +++ b/server/src/main/java/org/elasticsearch/index/IndexService.java @@ -330,7 +330,7 @@ public NodeMappingStats getNodeMappingStats() { if (mapperService == null) { return null; } - long totalCount = mapperService().mappingLookup().getTotalFieldsCount(); + long totalCount = mapperService().mappingLookup().getTotalMapperCount(); long totalEstimatedOverhead = totalCount * 1024L; // 1KiB estimated per mapping NodeMappingStats indexNodeMappingStats = new NodeMappingStats(totalCount, totalEstimatedOverhead); return indexNodeMappingStats; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index 0a669fb0ade8a..66c5de61bcd92 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -335,7 +335,7 @@ public final boolean addDynamicMapper(Mapper mapper) { if (mappingLookup.getMapper(mapper.name()) == null && mappingLookup.objectMappers().containsKey(mapper.name()) == false && dynamicMappers.containsKey(mapper.name()) == false) { - int mapperSize = mapper.mapperSize(); + int mapperSize = mapper.getTotalFieldsCount(); int additionalFieldsToAdd = getNewFieldsSize() + mapperSize; if (indexSettings().isIgnoreDynamicFieldsBeyondLimit()) { if (mappingLookup.exceedsLimit(indexSettings().getMappingTotalFieldsLimit(), additionalFieldsToAdd)) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java index c24ff9bb9c277..97d1b9368a6c9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldAliasMapper.java @@ -113,6 +113,11 @@ public void validate(MappingLookup mappers) { } } + @Override + public int getTotalFieldsCount() { + return 1; + } + public static class TypeParser implements Mapper.TypeParser { @Override public Mapper.Builder parse(String name, Map node, MappingParserContext parserContext) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 9ed23f61bf0ea..75d9fed2a4d4b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -51,6 +51,7 @@ import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Supplier; +import java.util.stream.Stream; import static org.elasticsearch.core.Strings.format; @@ -428,6 +429,11 @@ protected void doXContentBody(XContentBuilder builder, Params params) throws IOE protected abstract String contentType(); + @Override + public int getTotalFieldsCount() { + return 1 + Stream.of(multiFields.mappers).mapToInt(FieldMapper::getTotalFieldsCount).sum(); + } + public Map indexAnalyzers() { return Map.of(); } @@ -455,7 +461,7 @@ private void add(FieldMapper mapper) { private void update(FieldMapper toMerge, MapperMergeContext context) { if (mapperBuilders.containsKey(toMerge.simpleName()) == false) { - if (context.decrementFieldBudgetIfPossible(toMerge.mapperSize())) { + if (context.decrementFieldBudgetIfPossible(toMerge.getTotalFieldsCount())) { add(toMerge); } } else { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java index ca15248c037bc..397f99f63030c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java @@ -137,16 +137,8 @@ public static FieldType freezeAndDeduplicateFieldType(FieldType fieldType) { } /** - * Returns the size this mapper counts against the {@linkplain MapperService#INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING field limit}. - *

- * Needs to be in sync with {@link MappingLookup#getTotalFieldsCount()}. + * The total number of fields as defined in the mapping. + * Defines how this mapper counts towards {@link MapperService#INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING}. */ - public int mapperSize() { - int size = 1; - for (Mapper mapper : this) { - size += mapper.mapperSize(); - } - return size; - } - + public abstract int getTotalFieldsCount(); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index ea59d6640f647..0ae13241b7f56 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -55,6 +55,7 @@ private CacheKey() {} private final List indexTimeScriptMappers; private final Mapping mapping; private final Set completionFields; + private final int totalFieldsCount; /** * Creates a new {@link MappingLookup} instance by parsing the provided mapping and extracting its field definitions. @@ -127,6 +128,7 @@ private MappingLookup( Collection objectMappers, Collection aliasMappers ) { + this.totalFieldsCount = mapping.getRoot().getTotalFieldsCount(); this.mapping = mapping; Map fieldMappers = new HashMap<>(); Map objects = new HashMap<>(); @@ -223,6 +225,14 @@ FieldTypeLookup fieldTypesLookup() { * Returns the total number of fields defined in the mappings, including field mappers, object mappers as well as runtime fields. */ public long getTotalFieldsCount() { + return totalFieldsCount; + } + + /** + * Returns the total number of mappers defined in the mappings, including field mappers and their sub-fields + * (which are not explicitly defined in the mappings), multi-fields, object mappers, runtime fields and metadata field mappers. + */ + public long getTotalMapperCount() { return fieldMappers.size() + objectMappers.size() + runtimeFieldMappersCount; } @@ -286,7 +296,7 @@ boolean exceedsLimit(long limit, int additionalFieldsToAdd) { } long remainingFieldsUntilLimit(long mappingTotalFieldsLimit) { - return mappingTotalFieldsLimit - getTotalFieldsCount() + mapping.getSortedMetadataMappers().length; + return mappingTotalFieldsLimit - totalFieldsCount; } private void checkDimensionFieldLimit(long limit) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java index 9d7353859ed25..0bce02564ef34 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java @@ -182,6 +182,11 @@ public ObjectMapper build(MapperBuilderContext context) { } } + @Override + public int getTotalFieldsCount() { + return 1 + mappers.values().stream().mapToInt(Mapper::getTotalFieldsCount).sum(); + } + public static class TypeParser implements Mapper.TypeParser { @Override public boolean supportsVersion(IndexVersion indexCreatedVersion) { @@ -547,7 +552,7 @@ private static Map buildMergedMappers( Mapper mergeIntoMapper = mergedMappers.get(mergeWithMapper.simpleName()); Mapper merged = null; if (mergeIntoMapper == null) { - if (objectMergeContext.decrementFieldBudgetIfPossible(mergeWithMapper.mapperSize())) { + if (objectMergeContext.decrementFieldBudgetIfPossible(mergeWithMapper.getTotalFieldsCount())) { merged = mergeWithMapper; } else if (mergeWithMapper instanceof ObjectMapper om) { merged = truncateObjectMapper(reason, objectMergeContext, om); @@ -581,7 +586,7 @@ private static ObjectMapper truncateObjectMapper(MergeReason reason, MapperMerge // there's not enough capacity for the whole object mapper, // so we're just trying to add the shallow object, without it's sub-fields ObjectMapper shallowObjectMapper = objectMapper.withoutMappers(); - if (context.decrementFieldBudgetIfPossible(shallowObjectMapper.mapperSize())) { + if (context.decrementFieldBudgetIfPossible(shallowObjectMapper.getTotalFieldsCount())) { // now trying to add the sub-fields one by one via a merge, until we hit the limit return shallowObjectMapper.merge(objectMapper, reason, context); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java index 7994c018f40f2..2fe8c49df2175 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java @@ -576,11 +576,7 @@ private static boolean processField( } @Override - public int mapperSize() { - int size = runtimeFields().size(); - for (Mapper mapper : this) { - size += mapper.mapperSize(); - } - return size; + public int getTotalFieldsCount() { + return mappers.values().stream().mapToInt(Mapper::getTotalFieldsCount).sum() + runtimeFields.size(); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperTests.java index f816f403be89f..c20091a308ed8 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperTests.java @@ -34,7 +34,7 @@ public void testParsing() throws IOException { ); DocumentMapper mapper = createDocumentMapper(mapping); assertEquals(mapping, mapper.mappingSource().toString()); - assertEquals(2, mapper.mapping().getRoot().mapperSize()); + assertEquals(2, mapper.mapping().getRoot().getTotalFieldsCount()); } public void testParsingWithMissingPath() { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperMergeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperMergeTests.java index 0737dcb7cb5d2..005b14886d059 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperMergeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperMergeTests.java @@ -213,10 +213,10 @@ public void testMergeWithLimit() { final ObjectMapper mergedAdd1 = rootObjectMapper.merge(mergeWith, MapperMergeContext.root(false, false, 1)); // THEN "baz" new field is added to merged mapping - assertEquals(3, rootObjectMapper.mapperSize()); - assertEquals(4, mergeWith.mapperSize()); - assertEquals(3, mergedAdd0.mapperSize()); - assertEquals(4, mergedAdd1.mapperSize()); + assertEquals(3, rootObjectMapper.getTotalFieldsCount()); + assertEquals(4, mergeWith.getTotalFieldsCount()); + assertEquals(3, mergedAdd0.getTotalFieldsCount()); + assertEquals(4, mergedAdd1.getTotalFieldsCount()); } public void testMergeWithLimitTruncatedObjectField() { @@ -231,11 +231,11 @@ public void testMergeWithLimitTruncatedObjectField() { ObjectMapper mergedAdd1 = root.merge(mergeWith, MapperMergeContext.root(false, false, 1)); ObjectMapper mergedAdd2 = root.merge(mergeWith, MapperMergeContext.root(false, false, 2)); ObjectMapper mergedAdd3 = root.merge(mergeWith, MapperMergeContext.root(false, false, 3)); - assertEquals(0, root.mapperSize()); - assertEquals(0, mergedAdd0.mapperSize()); - assertEquals(1, mergedAdd1.mapperSize()); - assertEquals(2, mergedAdd2.mapperSize()); - assertEquals(3, mergedAdd3.mapperSize()); + assertEquals(0, root.getTotalFieldsCount()); + assertEquals(0, mergedAdd0.getTotalFieldsCount()); + assertEquals(1, mergedAdd1.getTotalFieldsCount()); + assertEquals(2, mergedAdd2.getTotalFieldsCount()); + assertEquals(3, mergedAdd3.getTotalFieldsCount()); ObjectMapper parent1 = (ObjectMapper) mergedAdd1.getMapper("parent"); assertNull(parent1.getMapper("child1")); @@ -262,9 +262,9 @@ public void testMergeSameObjectDifferentFields() { ObjectMapper mergedAdd0 = root.merge(mergeWith, MapperMergeContext.root(false, false, 0)); ObjectMapper mergedAdd1 = root.merge(mergeWith, MapperMergeContext.root(false, false, 1)); - assertEquals(2, root.mapperSize()); - assertEquals(2, mergedAdd0.mapperSize()); - assertEquals(3, mergedAdd1.mapperSize()); + assertEquals(2, root.getTotalFieldsCount()); + assertEquals(2, mergedAdd0.getTotalFieldsCount()); + assertEquals(3, mergedAdd1.getTotalFieldsCount()); ObjectMapper parent0 = (ObjectMapper) mergedAdd0.getMapper("parent"); assertNotNull(parent0.getMapper("child1")); @@ -285,13 +285,13 @@ public void testMergeWithLimitMultiField() { createTextKeywordMultiField("text", "keyword2") ).build(MapperBuilderContext.root(false, false)); - assertEquals(2, mergeInto.mapperSize()); - assertEquals(2, mergeWith.mapperSize()); + assertEquals(2, mergeInto.getTotalFieldsCount()); + assertEquals(2, mergeWith.getTotalFieldsCount()); ObjectMapper mergedAdd0 = mergeInto.merge(mergeWith, MapperMergeContext.root(false, false, 0)); ObjectMapper mergedAdd1 = mergeInto.merge(mergeWith, MapperMergeContext.root(false, false, 1)); - assertEquals(2, mergedAdd0.mapperSize()); - assertEquals(3, mergedAdd1.mapperSize()); + assertEquals(2, mergedAdd0.getTotalFieldsCount()); + assertEquals(3, mergedAdd1.getTotalFieldsCount()); } public void testMergeWithLimitRuntimeField() { @@ -302,13 +302,13 @@ public void testMergeWithLimitRuntimeField() { new TestRuntimeField("existing_runtime_field", "keyword") ).addRuntimeField(new TestRuntimeField("new_runtime_field", "keyword")).build(MapperBuilderContext.root(false, false)); - assertEquals(3, mergeInto.mapperSize()); - assertEquals(2, mergeWith.mapperSize()); + assertEquals(3, mergeInto.getTotalFieldsCount()); + assertEquals(2, mergeWith.getTotalFieldsCount()); ObjectMapper mergedAdd0 = mergeInto.merge(mergeWith, MapperMergeContext.root(false, false, 0)); ObjectMapper mergedAdd1 = mergeInto.merge(mergeWith, MapperMergeContext.root(false, false, 1)); - assertEquals(3, mergedAdd0.mapperSize()); - assertEquals(4, mergedAdd1.mapperSize()); + assertEquals(3, mergedAdd0.getTotalFieldsCount()); + assertEquals(4, mergedAdd1.getTotalFieldsCount()); } private static RootObjectMapper createRootSubobjectFalseLeafWithDots() { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java index cbb0929b813fc..29e5f8540734b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java @@ -530,15 +530,20 @@ public void testSyntheticSourceDocValuesFieldWithout() throws IOException { assertThat(mapper.mapping().getRoot().syntheticFieldLoader().docValuesLoader(null, null), nullValue()); } - public void testNestedObjectWithMultiFieldsMapperSize() throws IOException { + public void testNestedObjectWithMultiFieldsgetTotalFieldsCount() { ObjectMapper.Builder mapperBuilder = new ObjectMapper.Builder("parent_size_1", Explicit.IMPLICIT_TRUE).add( new ObjectMapper.Builder("child_size_2", Explicit.IMPLICIT_TRUE).add( new TextFieldMapper.Builder("grand_child_size_3", createDefaultIndexAnalyzers()).addMultiField( new KeywordFieldMapper.Builder("multi_field_size_4", IndexVersion.current()) - ).addMultiField(new KeywordFieldMapper.Builder("multi_field_size_5", IndexVersion.current())) + ) + .addMultiField( + new TextFieldMapper.Builder("grand_child_size_5", createDefaultIndexAnalyzers()).addMultiField( + new KeywordFieldMapper.Builder("multi_field_of_multi_field_size_6", IndexVersion.current()) + ) + ) ) ); - assertThat(mapperBuilder.build(MapperBuilderContext.root(false, false)).mapperSize(), equalTo(5)); + assertThat(mapperBuilder.build(MapperBuilderContext.root(false, false)).getTotalFieldsCount(), equalTo(6)); } public void testWithoutMappers() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/RootObjectMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/RootObjectMapperTests.java index 662a809e6d065..b616aa70dafde 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/RootObjectMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/RootObjectMapperTests.java @@ -166,7 +166,7 @@ public void testRuntimeSection() throws IOException { })); MapperService mapperService = createMapperService(mapping); assertEquals(mapping, mapperService.documentMapper().mappingSource().toString()); - assertEquals(3, mapperService.documentMapper().mapping().getRoot().mapperSize()); + assertEquals(3, mapperService.documentMapper().mapping().getRoot().getTotalFieldsCount()); } public void testRuntimeSectionRejectedUpdate() throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index 05aee30799de2..43ac8057a3fc0 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -427,6 +427,11 @@ public final void testMinimalToMaximal() throws IOException { assertParseMaximalWarnings(); } + public void testTotalFieldsCount() throws IOException { + MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping)); + assertEquals(1, mapperService.documentMapper().mapping().getRoot().getTotalFieldsCount()); + } + protected final void assertParseMinimalWarnings() { String[] warnings = getParseMinimalWarnings(); if (warnings.length > 0) {