From c869525c4fb4e91b405762d62c2083246ef42096 Mon Sep 17 00:00:00 2001 From: Michail Romaios <84708293+mromaios@users.noreply.github.com> Date: Tue, 26 Aug 2025 20:18:43 +0200 Subject: [PATCH] refactor(semantic_text): fail early in pre-8.11 indices (#133080) * fix(semantic_text): index underlying dense_vector field in older indices * Update docs/changelog/133080.yaml * [CI] Auto commit changes from spotless * update msg, change versions, add ut * indent * remove todo * [CI] Auto commit changes from spotless * update msg * [CI] Auto commit changes from spotless * add densevectormapper indexed ut * [CI] Auto commit changes from spotless * fix ut * undo accidental ut removal * [CI] Auto commit changes from spotless * Update docs/changelog/133080.yaml Co-authored-by: Kathleen DeRusso * remove sparse_vector exception change * [CI] Auto commit changes from spotless * reverting most of the stuff * removing unused import, rename test * [CI] Auto commit changes from spotless * syntax * revisit changelog * revisit changelog * Update docs/changelog/133080.yaml Co-authored-by: Kathleen DeRusso * Update x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java Co-authored-by: Kathleen DeRusso * changelog format * fix UT --------- Co-authored-by: elasticsearchmachine Co-authored-by: Kathleen DeRusso --- docs/changelog/133080.yaml | 5 ++ .../vectors/DenseVectorFieldMapperTests.java | 3 +- .../mapper/SemanticTextFieldMapper.java | 5 ++ .../mapper/SemanticTextFieldMapperTests.java | 52 +++++++++++++++++++ 4 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/133080.yaml diff --git a/docs/changelog/133080.yaml b/docs/changelog/133080.yaml new file mode 100644 index 0000000000000..78b6f579f5959 --- /dev/null +++ b/docs/changelog/133080.yaml @@ -0,0 +1,5 @@ +pr: 133080 +summary: "Disallow creating `semantic_text` fields in indices created prior to 8.11.0" +area: Relevance +type: bug +issues: [] diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 8f17dfa8fd56e..b9dcb88189bec 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -68,6 +68,7 @@ import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -107,7 +108,7 @@ private void indexMapping(XContentBuilder b, IndexVersion indexVersion) throws I if (elementType != ElementType.FLOAT) { b.field("element_type", elementType.toString()); } - if (indexVersion.onOrAfter(DenseVectorFieldMapper.INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) { + if (indexVersion.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION) || indexed) { // Serialize if it's new index version, or it was not the default for previous indices b.field("index", indexed); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 8f111bcb2c785..be0349c11f402 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -99,6 +99,7 @@ import java.util.function.Function; import java.util.function.Supplier; +import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR; import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ; import static org.elasticsearch.index.IndexVersions.SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X; import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING; @@ -124,6 +125,7 @@ */ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper { private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class); + public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher."; public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix"); @@ -156,6 +158,9 @@ public static final TypeParser parser(Supplier modelRegistry) { public static BiConsumer validateParserContext(String type) { return (n, c) -> { + if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) { + throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE); + } if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) { notInMultiFields(type).accept(n, c); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 47d839b515163..4ff4c79be7d28 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -107,6 +107,7 @@ import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_RESCORE_OVERSAMPLE; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.INDEX_OPTIONS_FIELD; +import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.UNSUPPORTED_INDEX_MESSAGE; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettings; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettingsOtherThan; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText; @@ -402,6 +403,57 @@ public void testInvalidTaskTypes() { } } + @Override + protected IndexVersion boostNotAllowedIndexVersion() { + return IndexVersions.NEW_SPARSE_VECTOR; + } + + public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException { + final String fieldName = "field"; + final XContentBuilder fieldMapping = fieldMapping(b -> { + b.field("type", "semantic_text"); + b.field(INFERENCE_ID_FIELD, "test_inference_id"); + b.startObject("model_settings"); + b.field("task_type", "text_embedding"); + b.field("dimensions", 384); + b.field("similarity", "cosine"); + b.field("element_type", "float"); + b.endObject(); + }); + assertOldIndexUnsupported(fieldMapping); + } + + public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException { + final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping); + assertOldIndexUnsupported(fieldMapping); + } + + public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException { + final XContentBuilder fieldMapping = fieldMapping(b -> { + b.field("type", "semantic_text"); + b.field("inference_id", "another_inference_id"); + b.startObject("model_settings"); + b.field("task_type", "sparse_embedding"); + b.endObject(); + }); + assertOldIndexUnsupported(fieldMapping); + } + + private void assertOldIndexUnsupported(XContentBuilder fieldMapping) { + + MapperParsingException exception = assertThrows( + MapperParsingException.class, + () -> createMapperService( + fieldMapping, + true, + IndexVersions.V_8_0_0, + IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR) + ) + ); + assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE)); + assertTrue(exception.getRootCause() instanceof UnsupportedOperationException); + } + public void testMultiFieldsSupport() throws IOException { if (useLegacyFormat) { Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {