Skip to content

Commit

Permalink
[Remove] Type from nested fields using new metadata field mapper (#3004)
Browse files Browse the repository at this point in the history
* [Remove] Type from nested fields using new metadata field mapper

types support is removed yet nested documents use the _type field to store the
path for nested documents. A new _nested_path metadata field mapper is added to
take the place of the _type field in order to remove the type dependency in
nested documents. BWC is handled in the new field mapper to ensure compatibility
with older versions.

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>

* pr fixes

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>

* add test to merge same mapping with empty index settings

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
  • Loading branch information
nknize authored Apr 26, 2022
1 parent c5ff8d6 commit 6b641d2
Show file tree
Hide file tree
Showing 21 changed files with 264 additions and 101 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -455,21 +455,23 @@ private static void innerParseObject(
private static void nested(ParseContext context, ObjectMapper.Nested nested) {
ParseContext.Document nestedDoc = context.doc();
ParseContext.Document parentDoc = nestedDoc.getParent();
Version indexVersion = context.indexSettings().getIndexVersionCreated();
if (nested.isIncludeInParent()) {
addFields(nestedDoc, parentDoc);
addFields(indexVersion, nestedDoc, parentDoc);
}
if (nested.isIncludeInRoot()) {
ParseContext.Document rootDoc = context.rootDoc();
// don't add it twice, if its included in parent, and we are handling the master doc...
if (!nested.isIncludeInParent() || parentDoc != rootDoc) {
addFields(nestedDoc, rootDoc);
addFields(indexVersion, nestedDoc, rootDoc);
}
}
}

private static void addFields(ParseContext.Document nestedDoc, ParseContext.Document rootDoc) {
private static void addFields(Version indexVersion, ParseContext.Document nestedDoc, ParseContext.Document rootDoc) {
String nestedPathFieldName = NestedPathFieldMapper.name(indexVersion);
for (IndexableField field : nestedDoc.getFields()) {
if (!field.name().equals(TypeFieldMapper.NAME)) {
if (field.name().equals(nestedPathFieldName) == false) {
rootDoc.add(field);
}
}
Expand Down Expand Up @@ -498,7 +500,7 @@ private static ParseContext nestedContext(ParseContext context, ObjectMapper map
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)
nestedDoc.add(new Field(TypeFieldMapper.NAME, mapper.nestedTypePathAsString(), TypeFieldMapper.Defaults.NESTED_FIELD_TYPE));
nestedDoc.add(NestedPathFieldMapper.field(context.indexSettings().getIndexVersionCreated(), mapper.nestedTypePath()));
return context;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.Version;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.lookup.SearchLookup;

import java.util.Collections;

public class NestedPathFieldMapper extends MetadataFieldMapper {
// OpenSearch version 2.0 removed types; this name is used for bwc
public static final String LEGACY_NAME = "_type";
public static final String NAME = "_nested_path";

public static class Defaults {
public static final FieldType FIELD_TYPE = new FieldType();
static {
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setStored(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}
}

/** private ctor; using SINGLETON to control BWC */
private NestedPathFieldMapper(String name) {
super(new NestedPathFieldType(name));
}

/** returns the field name */
public static String name(Version version) {
if (version.before(Version.V_2_0_0)) {
return LEGACY_NAME;
}
return NAME;
}

@Override
protected String contentType() {
return NAME;
}

private static final NestedPathFieldMapper LEGACY_INSTANCE = new NestedPathFieldMapper(LEGACY_NAME);
private static final NestedPathFieldMapper INSTANCE = new NestedPathFieldMapper(NAME);

public static final TypeParser PARSER = new FixedTypeParser(
c -> c.indexVersionCreated().before(Version.V_2_0_0) ? LEGACY_INSTANCE : INSTANCE
);

/** helper method to create a lucene field based on the opensearch version */
public static Field field(Version version, String path) {
return new Field(name(version), path, Defaults.FIELD_TYPE);
}

/** helper method to create a query based on the opensearch version */
public static Query filter(Version version, String path) {
return new TermQuery(new Term(name(version), new BytesRef(path)));
}

/** field type for the NestPath field */
public static final class NestedPathFieldType extends StringFieldType {
private NestedPathFieldType(String name) {
super(name, true, false, false, TextSearchInfo.SIMPLE_MATCH_ONLY, Collections.emptyMap());
}

@Override
public String typeName() {
return NAME;
}

@Override
public Query existsQuery(QueryShardContext context) {
throw new UnsupportedOperationException("Cannot run exists() query against the nested field path");
}

@Override
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "].");
}
}
}
21 changes: 11 additions & 10 deletions server/src/main/java/org/opensearch/index/mapper/ObjectMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,9 @@

package org.opensearch.index.mapper;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.OpenSearchParseException;
import org.opensearch.Version;
import org.opensearch.common.Explicit;
import org.opensearch.common.Nullable;
import org.opensearch.common.collect.CopyOnWriteHashMap;
Expand Down Expand Up @@ -388,8 +386,7 @@ protected static void parseProperties(ObjectMapper.Builder objBuilder, Map<Strin

private final Nested nested;

private final String nestedTypePathAsString;
private final BytesRef nestedTypePathAsBytes;
private final String nestedTypePath;

private final Query nestedTypeFilter;

Expand Down Expand Up @@ -420,9 +417,13 @@ protected static void parseProperties(ObjectMapper.Builder objBuilder, Map<Strin
} else {
this.mappers = CopyOnWriteHashMap.copyOf(mappers);
}
this.nestedTypePathAsString = "__" + fullPath;
this.nestedTypePathAsBytes = new BytesRef(nestedTypePathAsString);
this.nestedTypeFilter = new TermQuery(new Term(TypeFieldMapper.NAME, nestedTypePathAsBytes));
Version version = Version.indexCreated(settings);
if (version.before(Version.V_2_0_0)) {
this.nestedTypePath = "__" + fullPath;
} else {
this.nestedTypePath = fullPath;
}
this.nestedTypeFilter = NestedPathFieldMapper.filter(version, nestedTypePath);
}

@Override
Expand Down Expand Up @@ -486,8 +487,8 @@ public String fullPath() {
return this.fullPath;
}

public String nestedTypePathAsString() {
return nestedTypePathAsString;
public String nestedTypePath() {
return nestedTypePath;
}

public final Dynamic dynamic() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.mapper.MetadataFieldMapper;
import org.opensearch.index.mapper.NestedPathFieldMapper;
import org.opensearch.index.mapper.NumberFieldMapper;
import org.opensearch.index.mapper.ObjectMapper;
import org.opensearch.index.mapper.RangeType;
Expand Down Expand Up @@ -184,6 +185,7 @@ private static Map<String, MetadataFieldMapper.TypeParser> initBuiltInMetadataMa
builtInMetadataMappers.put(IndexFieldMapper.NAME, IndexFieldMapper.PARSER);
builtInMetadataMappers.put(DataStreamFieldMapper.NAME, DataStreamFieldMapper.PARSER);
builtInMetadataMappers.put(SourceFieldMapper.NAME, SourceFieldMapper.PARSER);
builtInMetadataMappers.put(NestedPathFieldMapper.NAME, NestedPathFieldMapper.PARSER);
builtInMetadataMappers.put(VersionFieldMapper.NAME, VersionFieldMapper.PARSER);
builtInMetadataMappers.put(SeqNoFieldMapper.NAME, SeqNoFieldMapper.PARSER);
// _field_names must be added last so that it has a chance to see all the other mappers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.opensearch.Version;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.mapper.MetadataFieldMapper;
import org.opensearch.index.mapper.NestedPathFieldMapper;
import org.opensearch.plugins.MapperPlugin;

import java.util.Collections;
Expand All @@ -50,6 +51,7 @@ public final class MapperRegistry {

private final Map<String, Mapper.TypeParser> mapperParsers;
private final Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers;
private final Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsersPre20;
private final Function<String, Predicate<String>> fieldFilter;

public MapperRegistry(
Expand All @@ -59,6 +61,9 @@ public MapperRegistry(
) {
this.mapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(mapperParsers));
this.metadataMapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(metadataMapperParsers));
Map<String, MetadataFieldMapper.TypeParser> tempPre20 = new LinkedHashMap<>(metadataMapperParsers);
tempPre20.remove(NestedPathFieldMapper.NAME);
this.metadataMapperParsersPre20 = Collections.unmodifiableMap(tempPre20);
this.fieldFilter = fieldFilter;
}

Expand All @@ -75,7 +80,7 @@ public Map<String, Mapper.TypeParser> getMapperParsers() {
* returned map uses the name of the field as a key.
*/
public Map<String, MetadataFieldMapper.TypeParser> getMetadataMapperParsers(Version indexCreatedVersion) {
return metadataMapperParsers;
return indexCreatedVersion.onOrAfter(Version.V_2_0_0) ? metadataMapperParsers : metadataMapperParsersPre20;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,14 +248,14 @@ public void testNestedHaveIdAndTypeFields() throws Exception {
assertNotNull(result.docs().get(0).getField(IdFieldMapper.NAME));
assertEquals(Uid.encodeId("1"), result.docs().get(0).getField(IdFieldMapper.NAME).binaryValue());
assertEquals(IdFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(IdFieldMapper.NAME).fieldType());
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
assertNotNull(result.docs().get(0).getField(NestedPathFieldMapper.NAME));
assertEquals("foo", result.docs().get(0).getField(NestedPathFieldMapper.NAME).stringValue());
assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
// Root document:
assertNotNull(result.docs().get(1).getField(IdFieldMapper.NAME));
assertEquals(Uid.encodeId("1"), result.docs().get(1).getField(IdFieldMapper.NAME).binaryValue());
assertEquals(IdFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(IdFieldMapper.NAME).fieldType());
assertNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
assertNull(result.docs().get(1).getField(NestedPathFieldMapper.NAME));
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ private static ObjectMapper createObjectMapper(String name) {
ObjectMapper.Nested.NO,
ObjectMapper.Dynamic.FALSE,
emptyMap(),
Settings.EMPTY
SETTINGS
);
}

Expand All @@ -232,7 +232,7 @@ private static ObjectMapper createNestedObjectMapper(String name) {
ObjectMapper.Nested.newNested(),
ObjectMapper.Dynamic.FALSE,
emptyMap(),
Settings.EMPTY
SETTINGS
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public void testSingleNested() throws Exception {
);

assertThat(doc.docs().size(), equalTo(2));
assertThat(doc.docs().get(0).get(TypeFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePathAsString()));
assertThat(doc.docs().get(0).get(NestedPathFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePath()));
assertThat(doc.docs().get(0).get("nested1.field1"), equalTo("1"));
assertThat(doc.docs().get(0).get("nested1.field2"), equalTo("2"));

Expand Down Expand Up @@ -180,10 +180,10 @@ public void testSingleNested() throws Exception {
);

assertThat(doc.docs().size(), equalTo(3));
assertThat(doc.docs().get(0).get(TypeFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePathAsString()));
assertThat(doc.docs().get(0).get(NestedPathFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePath()));
assertThat(doc.docs().get(0).get("nested1.field1"), equalTo("1"));
assertThat(doc.docs().get(0).get("nested1.field2"), equalTo("2"));
assertThat(doc.docs().get(1).get(TypeFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePathAsString()));
assertThat(doc.docs().get(1).get(NestedPathFieldMapper.NAME), equalTo(nested1Mapper.nestedTypePath()));
assertThat(doc.docs().get(1).get("nested1.field1"), equalTo("3"));
assertThat(doc.docs().get(1).get("nested1.field2"), equalTo("4"));

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.index.IndexableField;
import org.opensearch.common.bytes.BytesArray;
import org.opensearch.common.compress.CompressedXContent;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.test.OpenSearchSingleNodeTestCase;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;

/** tests for {@link org.opensearch.index.mapper.NestedPathFieldMapper} */
public class NestedPathFieldMapperTests extends OpenSearchSingleNodeTestCase {

public void testDefaultConfig() throws IOException {
Settings indexSettings = Settings.EMPTY;
MapperService mapperService = createIndex("test", indexSettings).mapperService();
DocumentMapper mapper = mapperService.merge(
MapperService.SINGLE_MAPPING_NAME,
new CompressedXContent("{\"" + MapperService.SINGLE_MAPPING_NAME + "\":{}}"),
MapperService.MergeReason.MAPPING_UPDATE
);
ParsedDocument document = mapper.parse(new SourceToParse("index", "id", new BytesArray("{}"), XContentType.JSON));
assertEquals(Collections.<IndexableField>emptyList(), Arrays.asList(document.rootDoc().getFields(NestedPathFieldMapper.NAME)));
}

public void testUpdatesWithSameMappings() throws IOException {
Settings indexSettings = Settings.EMPTY;
MapperService mapperService = createIndex("test", indexSettings).mapperService();
DocumentMapper mapper = mapperService.merge(
MapperService.SINGLE_MAPPING_NAME,
new CompressedXContent("{\"" + MapperService.SINGLE_MAPPING_NAME + "\":{}}"),
MapperService.MergeReason.MAPPING_UPDATE
);
mapper.merge(mapper.mapping(), MapperService.MergeReason.MAPPING_UPDATE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.index.IndexService;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.NestedPathFieldMapper;
import org.opensearch.index.query.MatchAllQueryBuilder;
import org.opensearch.index.query.NestedQueryBuilder;
import org.opensearch.index.query.QueryShardContext;
Expand Down Expand Up @@ -324,7 +325,7 @@ public void testNested() throws IOException {

Query expectedChildQuery = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), Occur.MUST)
// we automatically add a filter since the inner query might match non-nested docs
.add(new TermQuery(new Term("_type", "__nested1")), Occur.FILTER)
.add(new TermQuery(new Term(NestedPathFieldMapper.NAME, "nested1")), Occur.FILTER)
.build();
assertEquals(expectedChildQuery, query.getChildQuery());

Expand Down Expand Up @@ -352,7 +353,7 @@ public void testNested() throws IOException {

// we need to add the filter again because of include_in_parent
expectedChildQuery = new BooleanQuery.Builder().add(new TermQuery(new Term("nested2.foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("_type", "__nested2")), Occur.FILTER)
.add(new TermQuery(new Term(NestedPathFieldMapper.NAME, "nested2")), Occur.FILTER)
.build();
assertEquals(expectedChildQuery, query.getChildQuery());

Expand All @@ -367,7 +368,7 @@ public void testNested() throws IOException {

// we need to add the filter again because of include_in_root
expectedChildQuery = new BooleanQuery.Builder().add(new TermQuery(new Term("nested3.foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("_type", "__nested3")), Occur.FILTER)
.add(new TermQuery(new Term(NestedPathFieldMapper.NAME, "nested3")), Occur.FILTER)
.build();
assertEquals(expectedChildQuery, query.getChildQuery());

Expand Down
Loading

0 comments on commit 6b641d2

Please sign in to comment.