Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use fallback synthetic source for copy_to and doc_values: false cases #112294

Merged
merged 13 commits into from
Sep 10, 2024
8 changes: 8 additions & 0 deletions docs/changelog/112294.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pr: 112294
summary: "Use fallback synthetic source for `copy_to` and doc_values: false cases"
area: Mapping
type: enhancement
issues:
- 110753
- 110038
- 109546
Original file line number Diff line number Diff line change
Expand Up @@ -283,35 +283,6 @@ public void testOverrideIgnoreDynamicBeyondLimit() throws IOException {
assertThat(ignoreDynamicBeyondLimitIndexSetting, equalTo("false"));
}

public void testAddNonCompatibleMapping() throws IOException {
var nonCompatibleMappingAdditionTemplate = """
{
"template": {
"mappings": {
"properties": {
"bomb": {
"type": "ip",
"doc_values": false
}
}
}
}
}""";

Exception e = assertThrows(
ResponseException.class,
() -> putComponentTemplate(client, "logs@custom", nonCompatibleMappingAdditionTemplate)
);
assertThat(
e.getMessage(),
containsString("updating component template [logs@custom] results in invalid composable template [logs]")
);
assertThat(
e.getMessage(),
containsString("field [bomb] of type [ip] doesn't support synthetic source because it doesn't have doc values")
);
}

private static Map<String, Object> getMapping(final RestClient client, final String indexName) throws IOException {
final Request request = new Request("GET", "/" + indexName + "/_mapping");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import org.elasticsearch.logsdb.datageneration.DataGenerator;
import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
import org.elasticsearch.logsdb.datageneration.FieldType;
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler;
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse;
Expand Down Expand Up @@ -78,7 +77,18 @@ public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequ
}))
.withPredefinedFields(
List.of(
new PredefinedField.WithType("host.name", FieldType.KEYWORD),
// Customized because it always needs doc_values for aggregations.
new PredefinedField.WithGenerator("host.name", new FieldDataGenerator() {
@Override
public CheckedConsumer<XContentBuilder, IOException> mappingWriter() {
return b -> b.startObject().field("type", "keyword").endObject();
}

@Override
public CheckedConsumer<XContentBuilder, IOException> fieldValueGenerator() {
return b -> b.value(randomAlphaOfLength(5));
}
}),
// Needed for terms query
new PredefinedField.WithGenerator("method", new FieldDataGenerator() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
Expand Down Expand Up @@ -433,22 +432,14 @@ public MatchOnlyTextFieldType fieldType() {
}

@Override
protected SyntheticSourceMode syntheticSourceMode() {
return SyntheticSourceMode.NATIVE;
}

@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
if (copyTo().copyToFields().isEmpty() != true) {
throw new IllegalArgumentException(
"field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
);
}
return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), leafName()) {
protected SyntheticSourceSupport syntheticSourceSupport() {
var loader = new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), leafName()) {
@Override
protected void write(XContentBuilder b, Object value) throws IOException {
b.value((String) value);
}
};

return new SyntheticSourceSupport.Native(loader);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.apache.lucene.search.Query;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MetadataFieldMapper;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.SearchExecutionContext;
Expand Down Expand Up @@ -75,9 +74,4 @@ private RankFeatureMetaFieldMapper() {
protected String contentType() {
return CONTENT_TYPE;
}

@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
return SourceLoader.SyntheticFieldLoader.NOTHING;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.SimpleMappedFieldType;
import org.elasticsearch.index.mapper.SortedNumericDocValuesSyntheticFieldLoader;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.mapper.TimeSeriesParams;
Expand Down Expand Up @@ -705,32 +704,19 @@ public int docValueCount() {
}

@Override
protected SyntheticSourceMode syntheticSourceMode() {
return SyntheticSourceMode.NATIVE;
}
protected SyntheticSourceSupport syntheticSourceSupport() {
if (hasDocValues) {
kkrik-es marked this conversation as resolved.
Show resolved Hide resolved
var loader = new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed.value()) {
@Override
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(decodeForSyntheticSource(value, scalingFactor));
}
};

@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
if (hasDocValues == false) {
throw new IllegalArgumentException(
"field ["
+ fullPath()
+ "] of type ["
+ typeName()
+ "] doesn't support synthetic source because it doesn't have doc values"
);
}
if (copyTo().copyToFields().isEmpty() != true) {
throw new IllegalArgumentException(
"field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
);
return new SyntheticSourceSupport.Native(loader);
}
return new SortedNumericDocValuesSyntheticFieldLoader(fullPath(), leafName(), ignoreMalformed.value()) {
@Override
protected void writeValue(XContentBuilder b, long value) throws IOException {
b.value(decodeForSyntheticSource(value, scalingFactor));
}
};

return super.syntheticSourceSupport();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -470,12 +470,7 @@ private void mapping(XContentBuilder b) throws IOException {

@Override
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
return List.of(
new SyntheticSourceInvalidExample(
equalTo("field [field] of type [scaled_float] doesn't support synthetic source because it doesn't have doc values"),
b -> b.field("type", "scaled_float").field("scaling_factor", 10).field("doc_values", false)
)
);
return List.of();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,3 +351,45 @@ tsdb:
"@timestamp" : "2000-01-01T00:00:00.000Z"
"dimension" : "a"
foo: "Apache Lucene powers Elasticsearch"

---
synthetic_source with copy_to:
- requires:
cluster_features: ["mapper.source.synthetic_source_with_copy_to_and_doc_values_false"]
reason: requires copy_to support in synthetic source

- do:
indices.create:
index: synthetic_source_test
body:
mappings:
_source:
mode: synthetic
properties:
foo:
type: match_only_text
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if here we add store: true...are we going to store the value twice?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes but it is not necessarily the same value (e.g. normalizers).

copy_to: copy
copy:
type: keyword

- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache Lucene powers Elasticsearch"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also test with an array of values?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add them in a follow-up.


- do:
search:
index: synthetic_source_test
body:
fields: ["copy"]

- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
- match:
hits.hits.0.fields.copy.0: "Apache Lucene powers Elasticsearch"


Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TextParams;
Expand All @@ -46,7 +45,6 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -570,39 +568,23 @@ public FieldMapper.Builder getMergeBuilder() {
}

@Override
protected SyntheticSourceMode syntheticSourceMode() {
return SyntheticSourceMode.NATIVE;
}

@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
if (copyTo().copyToFields().isEmpty() != true) {
throw new IllegalArgumentException(
"field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
);
}
protected SyntheticSourceSupport syntheticSourceSupport() {
if (fieldType.stored()) {
return new StringStoredFieldFieldLoader(fullPath(), leafName()) {
var loader = new StringStoredFieldFieldLoader(fullPath(), leafName()) {
@Override
protected void write(XContentBuilder b, Object value) throws IOException {
b.value((String) value);
}
};

return new SyntheticSourceSupport.Native(loader);
}

var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
if (kwd != null) {
return kwd.syntheticFieldLoader(leafName());
return new SyntheticSourceSupport.Native(kwd.syntheticFieldLoader(leafName()));
Copy link
Contributor

@salvatore-campagna salvatore-campagna Sep 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can consider naming the two synthetic source modes WithDocValues and WithStoredFields instead of Native and Fallback? Like:

  • SyntheticSourceSupport.WithDocValues
  • SyntheticSourceSupport.WithStoredFields

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's not really the distinction i am trying to make. Yes, _ignored_source is a stored field that holds the value used for synthetic source but it is also possible that the field itself uses a different stored field for synthetic source. That becomes pretty blurry.

The difference between Native and Fallback is who controls synthetic source logic. In Native it's the mapper, in Fallback it's on a layer above and the mapper does not control what happens.

}

throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"field [%s] of type [%s] doesn't support synthetic source unless it is stored or has a sub-field of"
+ " type [keyword] with doc values or stored and without a normalizer",
fullPath(),
typeName()
)
);
return super.syntheticSourceSupport();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,34 @@ multiple values in stored annotated_text field with keyword multi-field:
- match:
hits.hits.0._source:
annotated_text: ["world", "hello", "world"]

---
fallback synthetic source:
- do:
indices.create:
index: test
body:
mappings:
_source:
mode: synthetic
properties:
annotated_text:
type: annotated_text
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as before...if we add store: true do we store twice?

store: false

- do:
index:
index: test
id: 1
refresh: true
body:
annotated_text: ["world", "hello", "world"]

- do:
search:
index: test

- match:
hits.hits.0._source:
annotated_text: ["world", "hello", "world"]

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.elasticsearch.index.mapper.MetadataFieldMapper;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.SearchExecutionContext;

Expand Down Expand Up @@ -97,9 +96,4 @@ public void postParse(DocumentParserContext context) {
public FieldMapper.Builder getMergeBuilder() {
return new Builder().init(this);
}

@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
return SourceLoader.SyntheticFieldLoader.NOTHING;
}
}
Loading