Skip to content

Commit d29bb8a

Browse files
kderussomromaioselasticsearchmachine
authored
[9.0] refactor(semantic_text): fail early in pre-8.11 indices (#133080) (#133578)
* refactor(semantic_text): fail early in pre-8.11 indices (#133080) * fix(semantic_text): index underlying dense_vector field in older indices * Update docs/changelog/133080.yaml * [CI] Auto commit changes from spotless * update msg, change versions, add ut * indent * remove todo * [CI] Auto commit changes from spotless * update msg * [CI] Auto commit changes from spotless * add densevectormapper indexed ut * [CI] Auto commit changes from spotless * fix ut * undo accidental ut removal * [CI] Auto commit changes from spotless * Update docs/changelog/133080.yaml Co-authored-by: Kathleen DeRusso <kathleen.derusso@elastic.co> * remove sparse_vector exception change * [CI] Auto commit changes from spotless * reverting most of the stuff * removing unused import, rename test * [CI] Auto commit changes from spotless * syntax * revisit changelog * revisit changelog * Update docs/changelog/133080.yaml Co-authored-by: Kathleen DeRusso <kathleen.derusso@elastic.co> * Update x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java Co-authored-by: Kathleen DeRusso <kathleen.derusso@elastic.co> * changelog format * fix UT --------- Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co> Co-authored-by: Kathleen DeRusso <kathleen.derusso@elastic.co> (cherry picked from commit 8f41a4b) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java # x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java * [CI] Auto commit changes from spotless --------- Co-authored-by: Michail Romaios <84708293+mromaios@users.noreply.github.com> Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
1 parent 1a1632f commit d29bb8a

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

docs/changelog/133080.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 133080
2+
summary: "Disallow creating `semantic_text` fields in indices created prior to 8.11.0"
3+
area: Relevance
4+
type: bug
5+
issues: []

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
package org.elasticsearch.xpack.inference.mapper;
99

10+
import org.apache.logging.log4j.LogManager;
11+
import org.apache.logging.log4j.Logger;
1012
import org.apache.lucene.index.FieldInfos;
1113
import org.apache.lucene.index.LeafReaderContext;
1214
import org.apache.lucene.search.DocIdSetIterator;
@@ -89,6 +91,7 @@
8991
import java.util.function.BiConsumer;
9092
import java.util.function.Function;
9193

94+
import static org.elasticsearch.index.IndexVersions.NEW_SPARSE_VECTOR;
9295
import static org.elasticsearch.inference.TaskType.SPARSE_EMBEDDING;
9396
import static org.elasticsearch.inference.TaskType.TEXT_EMBEDDING;
9497
import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
@@ -110,6 +113,8 @@
110113
* A {@link FieldMapper} for semantic text fields.
111114
*/
112115
public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper {
116+
private static final Logger logger = LogManager.getLogger(SemanticTextFieldMapper.class);
117+
public static final String UNSUPPORTED_INDEX_MESSAGE = "[semantic_text] is available on indices created with 8.11 or higher.";
113118
public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix");
114119
public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix");
115120
public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix");
@@ -129,6 +134,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
129134

130135
public static BiConsumer<String, MappingParserContext> validateParserContext(String type) {
131136
return (n, c) -> {
137+
if (c.getIndexSettings().getIndexVersionCreated().before(NEW_SPARSE_VECTOR)) {
138+
throw new UnsupportedOperationException(UNSUPPORTED_INDEX_MESSAGE);
139+
}
132140
if (InferenceMetadataFieldsMapper.isEnabled(c.getIndexSettings().getSettings()) == false) {
133141
notInMultiFields(type).accept(n, c);
134142
}

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getChunksFieldName;
9393
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getEmbeddingsFieldName;
9494
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID;
95+
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.UNSUPPORTED_INDEX_MESSAGE;
9596
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText;
9697
import static org.hamcrest.Matchers.containsString;
9798
import static org.hamcrest.Matchers.equalTo;
@@ -339,6 +340,57 @@ public void testInvalidTaskTypes() {
339340
}
340341
}
341342

343+
@Override
344+
protected IndexVersion boostNotAllowedIndexVersion() {
345+
return IndexVersions.NEW_SPARSE_VECTOR;
346+
}
347+
348+
public void testOldIndexSemanticTextDenseVectorRaisesError() throws IOException {
349+
final String fieldName = "field";
350+
final XContentBuilder fieldMapping = fieldMapping(b -> {
351+
b.field("type", "semantic_text");
352+
b.field(INFERENCE_ID_FIELD, "test_inference_id");
353+
b.startObject("model_settings");
354+
b.field("task_type", "text_embedding");
355+
b.field("dimensions", 384);
356+
b.field("similarity", "cosine");
357+
b.field("element_type", "float");
358+
b.endObject();
359+
});
360+
assertOldIndexUnsupported(fieldMapping);
361+
}
362+
363+
public void testOldIndexSemanticTextMinimalMappingRaisesError() throws IOException {
364+
final XContentBuilder fieldMapping = fieldMapping(this::minimalMapping);
365+
assertOldIndexUnsupported(fieldMapping);
366+
}
367+
368+
public void testOldIndexSemanticTextSparseVersionRaisesError() throws IOException {
369+
final XContentBuilder fieldMapping = fieldMapping(b -> {
370+
b.field("type", "semantic_text");
371+
b.field("inference_id", "another_inference_id");
372+
b.startObject("model_settings");
373+
b.field("task_type", "sparse_embedding");
374+
b.endObject();
375+
});
376+
assertOldIndexUnsupported(fieldMapping);
377+
}
378+
379+
private void assertOldIndexUnsupported(XContentBuilder fieldMapping) {
380+
381+
MapperParsingException exception = assertThrows(
382+
MapperParsingException.class,
383+
() -> createMapperService(
384+
fieldMapping,
385+
true,
386+
IndexVersions.V_8_0_0,
387+
IndexVersionUtils.getPreviousVersion(IndexVersions.NEW_SPARSE_VECTOR)
388+
)
389+
);
390+
assertTrue(exception.getMessage().contains(UNSUPPORTED_INDEX_MESSAGE));
391+
assertTrue(exception.getRootCause() instanceof UnsupportedOperationException);
392+
}
393+
342394
public void testMultiFieldsSupport() throws IOException {
343395
if (useLegacyFormat) {
344396
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {

0 commit comments

Comments
 (0)