Skip to content

Commit 4035121

Browse files
committed
Correctly identify parent of copy_to destination field for synthetic source purposes
1 parent c56bd7e commit 4035121

File tree

9 files changed

+609
-62
lines changed

9 files changed

+609
-62
lines changed

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml

Lines changed: 403 additions & 6 deletions
Large diffs are not rendered by default.

server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public int get() {
129129
* that copy_to field in introduced using a dynamic template
130130
* in this document and therefore is not present in mapping yet.
131131
*/
132-
private final Set<String> copyToFields;
132+
public final Set<String> copyToFields;
133133

134134
// Indicates if the source for this context has been marked to be recorded. Applies to synthetic source only.
135135
private boolean recordedSource;
@@ -453,20 +453,6 @@ public boolean isFieldAppliedFromTemplate(String name) {
453453

454454
public void markFieldAsCopyTo(String fieldName) {
455455
copyToFields.add(fieldName);
456-
if (mappingLookup.isSourceSynthetic() && indexSettings().getSkipIgnoredSourceWrite() == false) {
457-
/*
458-
Mark this field as containing copied data meaning it should not be present
459-
in synthetic _source (to be consistent with stored _source).
460-
Ignored source values take precedence over standard synthetic source implementation
461-
so by adding this nothing entry we "disable" field in synthetic source.
462-
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
463-
in copied field after reindexing.
464-
465-
Note that this applies to fields that are copied from fields using ignored source themselves
466-
and therefore we don't check for canAddIgnoredField().
467-
*/
468-
ignoredFieldValues.add(IgnoredSourceFieldMapper.NameValue.fromContext(this, fieldName, XContentDataHelper.nothing()));
469-
}
470456
}
471457

472458
public boolean isCopyToDestinationField(String name) {

server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import java.nio.charset.StandardCharsets;
2828
import java.util.ArrayList;
2929
import java.util.Collections;
30-
import java.util.Comparator;
3130
import java.util.List;
3231
import java.util.Map;
3332
import java.util.Set;
@@ -113,6 +112,10 @@ NameValue cloneWithValue(BytesRef value) {
113112
assert value() == null;
114113
return new NameValue(name, parentOffset, value, doc);
115114
}
115+
116+
boolean hasValue() {
117+
return XContentDataHelper.isDataPresent(value);
118+
}
116119
}
117120

118121
static final class IgnoredValuesFieldMapperType extends StringFieldType {
@@ -147,10 +150,37 @@ protected String contentType() {
147150
@Override
148151
public void postParse(DocumentParserContext context) {
149152
// Ignored values are only expected in synthetic mode.
150-
assert context.getIgnoredFieldValues().isEmpty() || context.mappingLookup().isSourceSynthetic();
153+
if (context.mappingLookup().isSourceSynthetic() == false) {
154+
assert context.getIgnoredFieldValues().isEmpty();
155+
return;
156+
}
157+
151158
List<NameValue> ignoredFieldValues = new ArrayList<>(context.getIgnoredFieldValues());
152-
// ensure consistent ordering when retrieving synthetic source
153-
Collections.sort(ignoredFieldValues, Comparator.comparing(NameValue::name));
159+
if (indexSettings.getSkipIgnoredSourceWrite() == false) {
160+
/*
161+
Mark this field as containing copied data meaning it should not be present
162+
in synthetic _source (to be consistent with stored _source).
163+
Ignored source values take precedence over standard synthetic source implementation
164+
so by adding this nothing entry we "disable" field in synthetic source.
165+
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
166+
in copied field after reindexing.
167+
*/
168+
for (String copyToField : context.copyToFields) {
169+
ObjectMapper parent = context.parent().findParentMapper(copyToField);
170+
if (parent == null) {
171+
// There are scenarios when this can happen:
172+
// 1. all values of the field that is the source of copy_to are null
173+
// 2. copy_to points at a field inside disabled object
174+
// 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly on re-parse.
175+
continue;
176+
}
177+
int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1;
178+
ignoredFieldValues.add(
179+
new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.nothing(), context.doc())
180+
);
181+
}
182+
}
183+
154184
for (NameValue nameValue : ignoredFieldValues) {
155185
nameValue.doc().add(new StoredField(NAME, encode(nameValue)));
156186
}

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ public Set<NodeFeature> getFeatures() {
4040
Mapper.SYNTHETIC_SOURCE_KEEP_FEATURE,
4141
SourceFieldMapper.SYNTHETIC_SOURCE_WITH_COPY_TO_AND_DOC_VALUES_FALSE_SUPPORT,
4242
SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_FIX,
43-
FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT
43+
FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT,
44+
SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX
4445
);
4546
}
4647
}

server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java

Lines changed: 91 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,37 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
808808

809809
}
810810

811+
ObjectMapper findParentMapper(String leafFieldPath) {
812+
ObjectMapper current = this;
813+
String pathInCurrent = leafFieldPath;
814+
815+
while (current != null) {
816+
if (current.mappers.containsKey(pathInCurrent)) {
817+
return current;
818+
}
819+
820+
// Go one level down if possible
821+
var pathComponents = pathInCurrent.split("\\.");
822+
var prefixParts = new ArrayList<String>();
823+
824+
var parent = current;
825+
current = null;
826+
for (int i = 0; i < pathComponents.length - 1; i++) {
827+
prefixParts.add(pathComponents[i]);
828+
var childMapperName = String.join(".", prefixParts);
829+
var childMapper = parent.mappers.get(childMapperName);
830+
831+
if (childMapper instanceof ObjectMapper objectMapper) {
832+
current = objectMapper;
833+
pathInCurrent = pathInCurrent.substring(childMapperName.length() + 1);
834+
break;
835+
}
836+
}
837+
}
838+
839+
return null;
840+
}
841+
811842
protected SourceLoader.SyntheticFieldLoader syntheticFieldLoader(Stream<Mapper> mappers, boolean isFragment) {
812843
var fields = mappers.sorted(Comparator.comparing(Mapper::fullPath))
813844
.map(Mapper::syntheticFieldLoader)
@@ -828,10 +859,16 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
828859
private class SyntheticSourceFieldLoader implements SourceLoader.SyntheticFieldLoader {
829860
private final List<SourceLoader.SyntheticFieldLoader> fields;
830861
private final boolean isFragment;
862+
831863
private boolean storedFieldLoadersHaveValues;
832864
private boolean docValuesLoadersHaveValues;
833865
private boolean ignoredValuesPresent;
834866
private List<IgnoredSourceFieldMapper.NameValue> ignoredValues;
867+
// If this loader has anything to write.
868+
// In special cases this can be false even if doc values loaders or stored field loaders
869+
// have values.
870+
private boolean writersHaveValues;
871+
private TreeMap<String, FieldWriter> currentWriters;
835872

836873
private SyntheticSourceFieldLoader(List<SourceLoader.SyntheticFieldLoader> fields, boolean isFragment) {
837874
this.fields = fields;
@@ -882,22 +919,69 @@ public boolean advanceToDoc(int docId) throws IOException {
882919
}
883920
}
884921

922+
@Override
923+
public void prepare() {
924+
if ((storedFieldLoadersHaveValues || docValuesLoadersHaveValues || ignoredValuesPresent) == false) {
925+
writersHaveValues = false;
926+
return;
927+
}
928+
929+
for (var loader : fields) {
930+
// Currently this logic is only relevant for object loaders.
931+
if (loader instanceof ObjectMapper.SyntheticSourceFieldLoader objectSyntheticFieldLoader) {
932+
objectSyntheticFieldLoader.prepare();
933+
}
934+
}
935+
936+
currentWriters = new TreeMap<>();
937+
938+
if (ignoredValues != null && ignoredValues.isEmpty() == false) {
939+
for (IgnoredSourceFieldMapper.NameValue value : ignoredValues) {
940+
if (value.hasValue()) {
941+
writersHaveValues |= true;
942+
}
943+
944+
var existing = currentWriters.get(value.name());
945+
if (existing == null) {
946+
currentWriters.put(value.name(), new FieldWriter.IgnoredSource(value));
947+
} else if (existing instanceof FieldWriter.IgnoredSource isw) {
948+
isw.mergeWith(value);
949+
}
950+
}
951+
}
952+
953+
for (SourceLoader.SyntheticFieldLoader field : fields) {
954+
if (field.hasValue()) {
955+
if (currentWriters.containsKey(field.fieldName()) == false) {
956+
writersHaveValues |= true;
957+
currentWriters.put(field.fieldName(), new FieldWriter.FieldLoader(field));
958+
} else {
959+
// Skip if the field source is stored separately, to avoid double-printing.
960+
// Make sure to reset the state of loader so that values stored inside will not
961+
// be used after this document is finished.
962+
field.reset();
963+
}
964+
}
965+
}
966+
}
967+
885968
@Override
886969
public boolean hasValue() {
887-
return storedFieldLoadersHaveValues || docValuesLoadersHaveValues || ignoredValuesPresent;
970+
return writersHaveValues;
888971
}
889972

890973
@Override
891974
public void write(XContentBuilder b) throws IOException {
892975
if (hasValue() == false) {
893976
return;
894977
}
978+
895979
if (isRoot() && isEnabled() == false) {
896980
// If the root object mapper is disabled, it is expected to contain
897981
// the source encapsulated within a single ignored source value.
898982
assert ignoredValues.size() == 1 : ignoredValues.size();
899983
XContentDataHelper.decodeAndWrite(b, ignoredValues.get(0).value());
900-
ignoredValues = null;
984+
softReset();
901985
return;
902986
}
903987

@@ -907,41 +991,10 @@ public void write(XContentBuilder b) throws IOException {
907991
b.startObject(leafName());
908992
}
909993

910-
if (ignoredValues != null && ignoredValues.isEmpty() == false) {
911-
// Use an ordered map between field names and writer functions, to order writing by field name.
912-
Map<String, FieldWriter> orderedFields = new TreeMap<>();
913-
for (IgnoredSourceFieldMapper.NameValue value : ignoredValues) {
914-
var existing = orderedFields.get(value.name());
915-
if (existing == null) {
916-
orderedFields.put(value.name(), new FieldWriter.IgnoredSource(value));
917-
} else if (existing instanceof FieldWriter.IgnoredSource isw) {
918-
isw.mergeWith(value);
919-
}
920-
}
921-
for (SourceLoader.SyntheticFieldLoader field : fields) {
922-
if (field.hasValue()) {
923-
if (orderedFields.containsKey(field.fieldName()) == false) {
924-
orderedFields.put(field.fieldName(), new FieldWriter.FieldLoader(field));
925-
} else {
926-
// Skip if the field source is stored separately, to avoid double-printing.
927-
// Make sure to reset the state of loader so that values stored inside will not
928-
// be used after this document is finished.
929-
field.reset();
930-
}
931-
}
932-
}
933-
934-
for (var writer : orderedFields.values()) {
935-
writer.writeTo(b);
936-
}
937-
ignoredValues = null;
938-
} else {
939-
for (SourceLoader.SyntheticFieldLoader field : fields) {
940-
if (field.hasValue()) {
941-
field.write(b);
942-
}
943-
}
994+
for (var writer : currentWriters.values()) {
995+
writer.writeTo(b);
944996
}
997+
945998
b.endObject();
946999
softReset();
9471000
}
@@ -957,6 +1010,8 @@ private void softReset() {
9571010
storedFieldLoadersHaveValues = false;
9581011
docValuesLoadersHaveValues = false;
9591012
ignoredValuesPresent = false;
1013+
ignoredValues = null;
1014+
writersHaveValues = false;
9601015
}
9611016

9621017
@Override

server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ public class SourceFieldMapper extends MetadataFieldMapper {
4848
"mapper.source.synthetic_source_with_copy_to_and_doc_values_false"
4949
);
5050
public static final NodeFeature SYNTHETIC_SOURCE_COPY_TO_FIX = new NodeFeature("mapper.source.synthetic_source_copy_to_fix");
51+
public static final NodeFeature SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX = new NodeFeature(
52+
"mapper.source.synthetic_source_copy_to_inside_objects_fix"
53+
);
5154

5255
public static final String NAME = "_source";
5356
public static final String RECOVERY_SOURCE_NAME = "_recovery_source";

server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,9 @@ public void write(LeafStoredFieldLoader storedFieldLoader, int docId, XContentBu
209209
if (docValuesLoader != null) {
210210
docValuesLoader.advanceToDoc(docId);
211211
}
212+
213+
loader.prepare();
214+
212215
// TODO accept a requested xcontent type
213216
if (loader.hasValue()) {
214217
loader.write(b);
@@ -299,6 +302,15 @@ public String fieldName() {
299302
*/
300303
DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException;
301304

305+
/**
306+
Perform any preprocessing needed before producing synthetic source.
307+
The expectation is for this method to be called before {@link SyntheticFieldLoader#hasValue()}
308+
and {@link SyntheticFieldLoader#write(XContentBuilder)} are used.
309+
*/
310+
default void prepare() {
311+
// Noop
312+
}
313+
302314
/**
303315
* Has this field loaded any values for this document?
304316
*/

server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,6 +1518,37 @@ public void testStoredNestedSubObjectWithNameOverlappingParentName() throws IOEx
15181518
{"path":{"at":{"foo":"A"}}}""", syntheticSource);
15191519
}
15201520

1521+
public void testCopyToLogicInsideObject() throws IOException {
1522+
DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> {
1523+
b.startObject("path");
1524+
b.startObject("properties");
1525+
{
1526+
b.startObject("at").field("type", "keyword").field("copy_to", "copy_top.copy").endObject();
1527+
}
1528+
b.endObject();
1529+
b.endObject();
1530+
b.startObject("copy_top");
1531+
b.startObject("properties");
1532+
{
1533+
b.startObject("copy").field("type", "keyword").endObject();
1534+
}
1535+
b.endObject();
1536+
b.endObject();
1537+
})).documentMapper();
1538+
1539+
CheckedConsumer<XContentBuilder, IOException> document = b -> {
1540+
b.startObject("path");
1541+
b.field("at", "A");
1542+
b.endObject();
1543+
};
1544+
1545+
var doc = documentMapper.parse(source(document));
1546+
assertNotNull(doc.docs().get(0).getField("copy_top.copy"));
1547+
1548+
var syntheticSource = syntheticSource(documentMapper, document);
1549+
assertEquals("{\"path\":{\"at\":\"A\"}}", syntheticSource);
1550+
}
1551+
15211552
protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader)
15221553
throws IOException {
15231554
// We exclude ignored source field since in some cases it contains an exact copy of a part of document source.

server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,4 +811,36 @@ public void testFlattenExplicitSubobjectsTrue() {
811811
exception.getMessage()
812812
);
813813
}
814+
815+
public void testFindParentMapper() {
816+
MapperBuilderContext rootContext = MapperBuilderContext.root(false, false);
817+
818+
var rootBuilder = new RootObjectMapper.Builder("_doc", Optional.empty());
819+
rootBuilder.add(new KeywordFieldMapper.Builder("keyword", IndexVersion.current()));
820+
821+
var child = new ObjectMapper.Builder("child", Optional.empty());
822+
child.add(new KeywordFieldMapper.Builder("keyword2", IndexVersion.current()));
823+
child.add(new KeywordFieldMapper.Builder("keyword.with.dot", IndexVersion.current()));
824+
rootBuilder.add(child);
825+
826+
var childWithDot = new ObjectMapper.Builder("childwith.dot", Optional.empty());
827+
childWithDot.add(new KeywordFieldMapper.Builder("keyword3", IndexVersion.current()));
828+
childWithDot.add(new KeywordFieldMapper.Builder("keyword4.with.dot", IndexVersion.current()));
829+
rootBuilder.add(childWithDot);
830+
831+
RootObjectMapper root = rootBuilder.build(rootContext);
832+
833+
assertEquals("_doc", root.findParentMapper("keyword").fullPath());
834+
assertNull(root.findParentMapper("aa"));
835+
836+
assertEquals("child", root.findParentMapper("child.keyword2").fullPath());
837+
assertEquals("child", root.findParentMapper("child.keyword.with.dot").fullPath());
838+
assertNull(root.findParentMapper("child.long"));
839+
assertNull(root.findParentMapper("child.long.hello"));
840+
841+
assertEquals("childwith.dot", root.findParentMapper("childwith.dot.keyword3").fullPath());
842+
assertEquals("childwith.dot", root.findParentMapper("childwith.dot.keyword4.with.dot").fullPath());
843+
assertNull(root.findParentMapper("childwith.dot.long"));
844+
assertNull(root.findParentMapper("childwith.dot.long.hello"));
845+
}
814846
}

0 commit comments

Comments
 (0)