Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ public int get() {
private Field version;
private final SeqNoFieldMapper.SequenceIDFields seqID;
private final Set<String> fieldsAppliedFromTemplates;

/**
* Fields that are copied from values of other fields via copy_to.
* This per-document state is needed since it is possible
Expand Down Expand Up @@ -453,26 +454,16 @@ public boolean isFieldAppliedFromTemplate(String name) {

public void markFieldAsCopyTo(String fieldName) {
copyToFields.add(fieldName);
if (mappingLookup.isSourceSynthetic() && indexSettings().getSkipIgnoredSourceWrite() == false) {
/*
Mark this field as containing copied data meaning it should not be present
in synthetic _source (to be consistent with stored _source).
Ignored source values take precedence over standard synthetic source implementation
so by adding this nothing entry we "disable" field in synthetic source.
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
in copied field after reindexing.

Note that this applies to fields that are copied from fields using ignored source themselves
and therefore we don't check for canAddIgnoredField().
*/
ignoredFieldValues.add(IgnoredSourceFieldMapper.NameValue.fromContext(this, fieldName, XContentDataHelper.nothing()));
}
}

public boolean isCopyToDestinationField(String name) {
return copyToFields.contains(name);
}

public Set<String> getCopyToFields() {
return copyToFields;
}

/**
* Add a new mapper dynamically created while parsing.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
Expand Down Expand Up @@ -113,6 +112,10 @@ NameValue cloneWithValue(BytesRef value) {
assert value() == null;
return new NameValue(name, parentOffset, value, doc);
}

boolean hasValue() {
return XContentDataHelper.isDataPresent(value);
}
}

static final class IgnoredValuesFieldMapperType extends StringFieldType {
Expand Down Expand Up @@ -147,11 +150,38 @@ protected String contentType() {
@Override
public void postParse(DocumentParserContext context) {
// Ignored values are only expected in synthetic mode.
assert context.getIgnoredFieldValues().isEmpty() || context.mappingLookup().isSourceSynthetic();
List<NameValue> ignoredFieldValues = new ArrayList<>(context.getIgnoredFieldValues());
// ensure consistent ordering when retrieving synthetic source
Collections.sort(ignoredFieldValues, Comparator.comparing(NameValue::name));
for (NameValue nameValue : ignoredFieldValues) {
if (context.mappingLookup().isSourceSynthetic() == false) {
assert context.getIgnoredFieldValues().isEmpty();
return;
}

Collection<NameValue> ignoredValuesToWrite = context.getIgnoredFieldValues();
if (context.getCopyToFields().isEmpty() == false && indexSettings.getSkipIgnoredSourceWrite() == false) {
/*
Mark fields as containing copied data meaning they should not be present
in synthetic _source (to be consistent with stored _source).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: update text, no given field here..

Ignored source values take precedence over standard synthetic source implementation
so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source.
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
in copied field after reindexing.
*/
var mutableList = new ArrayList<>(ignoredValuesToWrite);
for (String copyToField : context.getCopyToFields()) {
ObjectMapper parent = context.parent().findParentMapper(copyToField);
if (parent == null) {
// There are scenarios when this can happen:
// 1. all values of the field that is the source of copy_to are null
// 2. copy_to points at a field inside a disabled object
// 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly on re-parse.
continue;
}
int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1;
mutableList.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), context.doc()));
}
ignoredValuesToWrite = mutableList;
}

for (NameValue nameValue : ignoredValuesToWrite) {
nameValue.doc().add(new StoredField(NAME, encode(nameValue)));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ public Set<NodeFeature> getFeatures() {
Mapper.SYNTHETIC_SOURCE_KEEP_FEATURE,
SourceFieldMapper.SYNTHETIC_SOURCE_WITH_COPY_TO_AND_DOC_VALUES_FALSE_SUPPORT,
SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_FIX,
FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT
FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT,
SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX
);
}
}
159 changes: 119 additions & 40 deletions server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,42 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep

}

ObjectMapper findParentMapper(String leafFieldPath) {
var pathComponents = leafFieldPath.split("\\.");
int startPathComponent = 0;

ObjectMapper current = this;
String pathInCurrent = leafFieldPath;

while (current != null) {
if (current.mappers.containsKey(pathInCurrent)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering if we have to check for pathInCurrent to be null since we pass it to containsKey. leafFieldPath is copyToFields right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't think of a scenario when we would add null as a copy to field in document parser context. I am not sure what this code should do in that case too.

return current;
}

// Go one level down if possible
var parent = current;
current = null;

var childMapperName = new StringBuilder();
for (int i = startPathComponent; i < pathComponents.length - 1; i++) {
if (childMapperName.isEmpty() == false) {
childMapperName.append(".");
}
childMapperName.append(pathComponents[i]);

var childMapper = parent.mappers.get(childMapperName.toString());
if (childMapper instanceof ObjectMapper objectMapper) {
current = objectMapper;
startPathComponent = i + 1;
pathInCurrent = pathInCurrent.substring(childMapperName.length() + 1);
break;
}
}
}

return null;
}

protected SourceLoader.SyntheticFieldLoader syntheticFieldLoader(Stream<Mapper> mappers, boolean isFragment) {
var fields = mappers.sorted(Comparator.comparing(Mapper::fullPath))
.map(Mapper::syntheticFieldLoader)
Expand All @@ -828,10 +864,18 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
private class SyntheticSourceFieldLoader implements SourceLoader.SyntheticFieldLoader {
private final List<SourceLoader.SyntheticFieldLoader> fields;
private final boolean isFragment;

private boolean storedFieldLoadersHaveValues;
private boolean docValuesLoadersHaveValues;
private boolean ignoredValuesPresent;
private List<IgnoredSourceFieldMapper.NameValue> ignoredValues;
// If this loader has anything to write.
// In special cases this can be false even if doc values loaders or stored field loaders
// have values.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: reference the copy_to case as an example?

// F.e. objects that only contain fields that are destinations of copy_to.
private boolean writersHaveValues;
// Use an ordered map between field names and writers to order writing by field name.
private TreeMap<String, FieldWriter> currentWriters;

private SyntheticSourceFieldLoader(List<SourceLoader.SyntheticFieldLoader> fields, boolean isFragment) {
this.fields = fields;
Expand Down Expand Up @@ -882,22 +926,69 @@ public boolean advanceToDoc(int docId) throws IOException {
}
}

@Override
public void prepare() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kinda like this version, compared to running everything in write().

if ((storedFieldLoadersHaveValues || docValuesLoadersHaveValues || ignoredValuesPresent) == false) {
writersHaveValues = false;
return;
}

for (var loader : fields) {
// Currently this logic is only relevant for object loaders.
if (loader instanceof ObjectMapper.SyntheticSourceFieldLoader objectSyntheticFieldLoader) {
objectSyntheticFieldLoader.prepare();
}
}

currentWriters = new TreeMap<>();

if (ignoredValues != null && ignoredValues.isEmpty() == false) {
for (IgnoredSourceFieldMapper.NameValue value : ignoredValues) {
if (value.hasValue()) {
writersHaveValues |= true;
}

var existing = currentWriters.get(value.name());
if (existing == null) {
currentWriters.put(value.name(), new FieldWriter.IgnoredSource(value));
} else if (existing instanceof FieldWriter.IgnoredSource isw) {
isw.mergeWith(value);
}
}
}

for (SourceLoader.SyntheticFieldLoader field : fields) {
if (field.hasValue()) {
if (currentWriters.containsKey(field.fieldName()) == false) {
writersHaveValues |= true;
currentWriters.put(field.fieldName(), new FieldWriter.FieldLoader(field));
} else {
// Skip if the field source is stored separately, to avoid double-printing.
// Make sure to reset the state of loader so that values stored inside will not
// be used after this document is finished.
field.reset();
}
}
}
}

@Override
public boolean hasValue() {
return storedFieldLoadersHaveValues || docValuesLoadersHaveValues || ignoredValuesPresent;
return writersHaveValues;
}

@Override
public void write(XContentBuilder b) throws IOException {
if (hasValue() == false) {
return;
}

if (isRoot() && isEnabled() == false) {
// If the root object mapper is disabled, it is expected to contain
// the source encapsulated within a single ignored source value.
assert ignoredValues.size() == 1 : ignoredValues.size();
XContentDataHelper.decodeAndWrite(b, ignoredValues.get(0).value());
ignoredValues = null;
softReset();
return;
}

Expand All @@ -907,41 +998,12 @@ public void write(XContentBuilder b) throws IOException {
b.startObject(leafName());
}

if (ignoredValues != null && ignoredValues.isEmpty() == false) {
// Use an ordered map between field names and writer functions, to order writing by field name.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy this comment.

Map<String, FieldWriter> orderedFields = new TreeMap<>();
for (IgnoredSourceFieldMapper.NameValue value : ignoredValues) {
var existing = orderedFields.get(value.name());
if (existing == null) {
orderedFields.put(value.name(), new FieldWriter.IgnoredSource(value));
} else if (existing instanceof FieldWriter.IgnoredSource isw) {
isw.mergeWith(value);
}
}
for (SourceLoader.SyntheticFieldLoader field : fields) {
if (field.hasValue()) {
if (orderedFields.containsKey(field.fieldName()) == false) {
orderedFields.put(field.fieldName(), new FieldWriter.FieldLoader(field));
} else {
// Skip if the field source is stored separately, to avoid double-printing.
// Make sure to reset the state of loader so that values stored inside will not
// be used after this document is finished.
field.reset();
}
}
}

for (var writer : orderedFields.values()) {
for (var writer : currentWriters.values()) {
if (writer.hasValue()) {
writer.writeTo(b);
}
ignoredValues = null;
} else {
for (SourceLoader.SyntheticFieldLoader field : fields) {
if (field.hasValue()) {
field.write(b);
}
}
}

b.endObject();
softReset();
}
Expand All @@ -957,6 +1019,8 @@ private void softReset() {
storedFieldLoadersHaveValues = false;
docValuesLoadersHaveValues = false;
ignoredValuesPresent = false;
ignoredValues = null;
writersHaveValues = false;
}

@Override
Expand Down Expand Up @@ -986,34 +1050,49 @@ public String fieldName() {
interface FieldWriter {
void writeTo(XContentBuilder builder) throws IOException;

boolean hasValue();

record FieldLoader(SourceLoader.SyntheticFieldLoader loader) implements FieldWriter {
@Override
public void writeTo(XContentBuilder builder) throws IOException {
loader.write(builder);
}

@Override
public boolean hasValue() {
return loader.hasValue();
}
}

class IgnoredSource implements FieldWriter {
private final String fieldName;
private final String leafName;
private final List<BytesRef> values;
private final List<BytesRef> encodedValues;

IgnoredSource(IgnoredSourceFieldMapper.NameValue initialValue) {
this.fieldName = initialValue.name();
this.leafName = initialValue.getFieldName();
this.values = new ArrayList<>();
this.values.add(initialValue.value());
this.encodedValues = new ArrayList<>();
if (initialValue.hasValue()) {
this.encodedValues.add(initialValue.value());
}
}

@Override
public void writeTo(XContentBuilder builder) throws IOException {
XContentDataHelper.writeMerged(builder, leafName, values);
XContentDataHelper.writeMerged(builder, leafName, encodedValues);
}

@Override
public boolean hasValue() {
return encodedValues.isEmpty() == false;
}

public FieldWriter mergeWith(IgnoredSourceFieldMapper.NameValue nameValue) {
assert Objects.equals(nameValue.name(), fieldName) : "IgnoredSource is merged with wrong field data";

values.add(nameValue.value());
if (nameValue.hasValue()) {
encodedValues.add(nameValue.value());
}
return this;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ public class SourceFieldMapper extends MetadataFieldMapper {
"mapper.source.synthetic_source_with_copy_to_and_doc_values_false"
);
public static final NodeFeature SYNTHETIC_SOURCE_COPY_TO_FIX = new NodeFeature("mapper.source.synthetic_source_copy_to_fix");
public static final NodeFeature SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX = new NodeFeature(
"mapper.source.synthetic_source_copy_to_inside_objects_fix"
);

public static final String NAME = "_source";
public static final String RECOVERY_SOURCE_NAME = "_recovery_source";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ public void write(LeafStoredFieldLoader storedFieldLoader, int docId, XContentBu
if (docValuesLoader != null) {
docValuesLoader.advanceToDoc(docId);
}

loader.prepare();

// TODO accept a requested xcontent type
if (loader.hasValue()) {
loader.write(b);
Expand Down Expand Up @@ -299,6 +302,16 @@ public String fieldName() {
*/
DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException;

/**
Perform any preprocessing needed before producing synthetic source
and deduce whether this mapper (and its children, if any) have values to write.
The expectation is for this method to be called before {@link SyntheticFieldLoader#hasValue()}
and {@link SyntheticFieldLoader#write(XContentBuilder)} are used.
*/
default void prepare() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prepareHasValue? to make it more concrete.

Copy link
Contributor Author

@lkts lkts Sep 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this more because it's a verb and it does not have has in there which has boolean associations.

// Noop
}

/**
* Has this field loaded any values for this document?
*/
Expand Down
Loading