From 6218161cc5ec942304a7a4e90e7cfd2348cd5fd3 Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Mon, 27 Feb 2023 22:37:54 -0800 Subject: [PATCH 01/25] Add FlatObject FieldMapper Signed-off-by: Mingshi Liu --- CHANGELOG.md | 1 + benchmarks/build.gradle | 7 + .../mapper/FlatObjectMappingBenchmark.java | 368 +++++++++ .../xcontent/JsonToStringXContentParser.java | 285 +++++++ .../index/mapper/FlatObjectFieldMapper.java | 769 ++++++++++++++++++ 5 files changed, 1430 insertions(+) create mode 100644 benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java create mode 100644 server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java create mode 100644 server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 886fad43a066b..e0942cd93d4a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Changed - Require MediaType in Strings.toString API ([#6009](https://github.com/opensearch-project/OpenSearch/pull/6009)) - [Refactor] XContent base classes from xcontent to core library ([#5902](https://github.com/opensearch-project/OpenSearch/pull/5902)) +- Introduce a new field type: flat-object (TODO: update the link!) ([#1018](https://github.com/opensearch-project/OpenSearch/issues/1018)) ### Deprecated diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle index 4a7825e9ba35b..803d8777592c6 100644 --- a/benchmarks/build.gradle +++ b/benchmarks/build.gradle @@ -40,16 +40,23 @@ archivesBaseName = 'opensearch-benchmarks' test.enabled = false dependencies { +// implementation project(":dependencies:client:opensearch:opensearch-rest-high-level-client") api( project(":server")) { // JMH ships with the conflicting version 4.6. This prevents us from using jopt-simple in benchmarks (which should be ok) but allows // us to invoke the JMH uberjar as usual. exclude group: 'net.sf.jopt-simple', module: 'jopt-simple' } + api(project(":client:rest-high-level")) + api "org.apache.httpcomponents:httpcore:${versions.httpcore}" + api "org.apache.httpcomponents:httpclient:${versions.httpclient}" api "org.openjdk.jmh:jmh-core:$versions.jmh" + implementation 'org.json:json:20210307' +// implementation 'org.apache.httpcomponents:httpcore5:5.1.3' annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:$versions.jmh" // Dependencies of JMH runtimeOnly 'net.sf.jopt-simple:jopt-simple:5.0.4' runtimeOnly 'org.apache.commons:commons-math3:3.6.1' + } // enable the JMH's BenchmarkProcessor to generate the final benchmark classes diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java new file mode 100644 index 0000000000000..c3000eba60993 --- /dev/null +++ b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java @@ -0,0 +1,368 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.benchmark.index.mapper; + +import org.apache.hc.core5.http.HttpHost; +import org.json.JSONObject; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.action.admin.indices.refresh.RefreshRequest; +import org.opensearch.action.admin.indices.refresh.RefreshResponse; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.benchmark.index.mapper.FlatObjectMappingBenchmark.MyState; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestClient; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.client.indices.CreateIndexRequest; +import org.opensearch.client.indices.CreateIndexResponse; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.SearchHits; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.opensearch.search.sort.SortOrder; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +@State(Scope.Thread) +@Fork(1) +@Warmup(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 100, time = 1, timeUnit = TimeUnit.SECONDS) + +public class FlatObjectMappingBenchmark { + + @State(Scope.Thread) + public static class MyState { + private RestHighLevelClient client; + + @Setup(Level.Trial) + public void setup() throws Exception { + String httpUri = System.getProperty("opensearch.uri", "http://localhost:9200"); + if (httpUri == null || httpUri.trim().isEmpty()) { + throw new IllegalArgumentException("opensearch.uri system property not set"); + } + + this.client = new RestHighLevelClient(RestClient.builder(HttpHost.create(httpUri))); + + } + + @TearDown(Level.Trial) + public void tearDown() throws Exception { + this.client.close(); + } + } + + /** + * DynamicIndex: + * create index and delete index + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void CreateDynamicIndex(MyState state) throws IOException, URISyntaxException { + GetDynamicIndex(state, "demo-dynamic-test"); + DeleteIndex(state, "demo-dynamic-test"); + } + + /** + * FlatObjectIndex: + * create index and delete index + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void CreateFlatObjectIndex(MyState state) throws IOException, URISyntaxException { + GetFlatObjectIndex(state, "demo-flat-object-test", "host"); + DeleteIndex(state, "demo-flat-object-test"); + } + + /** + * DynamicIndex: + * create index, upload one document and delete index + */ + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void indexDynamicMapping(MyState state) throws IOException, URISyntaxException { + GetDynamicIndex(state, "demo-dynamic-test1"); + String doc = + "{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; + UploadDoc(state, "demo-dynamic-test1", doc); + DeleteIndex(state, "demo-dynamic-test1"); + } + + /** + * FlatObjectIndex: + * create index, upload one document and delete index + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void indexFlatObjectMapping(MyState state) throws IOException, URISyntaxException { + GetFlatObjectIndex(state, "demo-flat-object-test1", "host"); + String doc = + "{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; + UploadDoc(state, "demo-flat-object-test1", doc); + DeleteIndex(state, "demo-flat-object-test1"); + } + + /** + * DynamicIndex: + * create index, upload one document, search for document and delete index + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void searchDynamicMapping(MyState state) throws IOException { + String indexName = "demo-dynamic-test2"; + GetDynamicIndex(state, indexName); + String doc = + "{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; + UploadDoc(state, indexName, doc); + SearchDoc(state, indexName, "host.hostname", "bionic", "@timestamp", "message"); + DeleteIndex(state, indexName); + } + + /** + * FlatObjectIndex: + * create index, upload one document, search for document and delete index + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void searchFlatObjectMapping(MyState state) throws IOException { + GetFlatObjectIndex(state, "demo-flat-object-test2", "host"); + String doc = + "{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; + UploadDoc(state, "demo-flat-object-test2", doc); + SearchDoc(state, "demo-flat-object-test2", "host", "name", "@timestamp", "message"); + DeleteIndex(state, "demo-flat-object-test2"); + } + + /** + * DynamicIndex: + * create index, upload a nested document in 100 levels, and each level with 10 fields, + * search for document and delete index + * Caught exceptions with the number of fields over 1000 + */ + // @Benchmark + // @BenchmarkMode(Mode.AverageTime) + // @OutputTimeUnit(TimeUnit.MILLISECONDS) + // public void searchDynamicMappingWithOneHundredNestedJSON(MyState state) throws IOException { + // + // String indexName = "demo-dynamic-test3"; + // GetDynamicIndex(state, indexName); + // String doc = GenerateRandomJson(); + // Map searchValueAndPath = findNestedValueAndPath(doc,99, "field0"); + // String searchValue = searchValueAndPath.get("value"); + // String searchFieldName = searchValueAndPath.get("path"); + // UploadDoc(state, indexName, doc); + // SearchDoc(state,indexName,searchFieldName,searchValue,searchValue ,searchFieldName ); + // DeleteIndex(state, indexName); + // } + + /** + * FlatObjectIndex: + * create index, upload a nested document in 100 levels, and each level with 10 fields, + * search for document and delete index + * works fine and able to return document + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void searchFlatObjectMappingInValueWithOneHundredNestedJSON(MyState state) throws IOException { + String indexName = "demo-flat-object-test4"; + GetFlatObjectIndex(state, indexName, "nested0"); + String doc = GenerateRandomJson(100, "nested"); + Map searchValueAndPath = findNestedValueAndPath(doc, 6, "nested0"); + String SearchRandomWord = searchValueAndPath.get("value"); + String SearchRandomPath = "nested0._value"; + String searchFieldName = "nested0"; + UploadDoc(state, indexName, doc); + SearchDoc(state, indexName, SearchRandomPath, SearchRandomWord, searchFieldName, searchFieldName); + DeleteIndex(state, indexName); + } + + private static void GetDynamicIndex(MyState state, String indexName) throws IOException { + CreateIndexRequest dynamicRequest = new CreateIndexRequest(indexName); + CreateIndexResponse dynamicResponse = state.client.indices().create(dynamicRequest, RequestOptions.DEFAULT); + if (!dynamicResponse.isAcknowledged()) { + System.out.println("Failed to create index"); + } + } + + private static void GetFlatObjectIndex(MyState state, String indexName, String flatObjectFieldName) throws IOException { + CreateIndexRequest flatRequest = new CreateIndexRequest(indexName).mapping( + XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject(flatObjectFieldName) + .field("type", "flat-object") + .endObject() + .endObject() + .endObject() + ); + + CreateIndexResponse flatResponse = state.client.indices().create(flatRequest, RequestOptions.DEFAULT); + + if (flatResponse.isAcknowledged()) {} else { + System.out.println("Failed to create index"); + } + } + + private static void DeleteIndex(MyState state, String indexName) throws IOException { + DeleteIndexRequest dynamicDeleteRequest = new DeleteIndexRequest(indexName); + AcknowledgedResponse dynamicDeleteResponse = state.client.indices().delete(dynamicDeleteRequest, RequestOptions.DEFAULT); + if (dynamicDeleteResponse.isAcknowledged()) {} else { + System.out.println("Failed to delete index"); + } + } + + private static void UploadDoc(MyState state, String indexName, String doc) throws IOException { + IndexRequest request = new IndexRequest(indexName); + request.source(doc, XContentType.JSON); + IndexResponse indexResponse = state.client.index(request, RequestOptions.DEFAULT); + if (!indexResponse.status().toString().equals("CREATED")) { + System.out.println("Index status is " + indexResponse.status()); + } else { + + } + } + + private static void SearchDoc( + MyState state, + String indexName, + String searchFieldName, + String searchText, + String sortFieldName, + String highlightFieldName + ) throws IOException { + // Refresh the index before searching + RefreshRequest refreshRequest = new RefreshRequest(indexName); + RefreshResponse refreshResponse = state.client.indices().refresh(refreshRequest, RequestOptions.DEFAULT); + if (!refreshResponse.getStatus().toString().equals("OK")) { + System.out.println("refreshResponse: " + refreshResponse.getStatus()); + } + + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.query(QueryBuilders.matchQuery(searchFieldName, searchText)); + sourceBuilder.from(0); + sourceBuilder.size(10); + sourceBuilder.sort(sortFieldName, SortOrder.DESC); + sourceBuilder.highlighter(new HighlightBuilder().field(highlightFieldName)); + SearchRequest searchRequest = new SearchRequest(indexName); + searchRequest.source(sourceBuilder); + SearchResponse SearchResponse = state.client.search(searchRequest, RequestOptions.DEFAULT); + if (!SearchResponse.status().toString().equals("OK")) { + System.out.println("the number of hit is: " + SearchResponse.getHits().getTotalHits().value); + System.out.println("SearchResponse: " + SearchResponse.toString()); + } + + SearchHits hits = SearchResponse.getHits(); + long totalHits = hits.getTotalHits().value; + if (totalHits == 0) { + throw new IOException("No hit is found"); + } + } + + private static String GenerateRandomJson(int numberOfNestedLevel, String subObjectName) { + JSONObject json = new JSONObject(); + Random random = new Random(); + + // Create 100 nested levels + + for (int i = 0; i < numberOfNestedLevel; i++) { + JSONObject nestedObject = new JSONObject(); + + // Add 10 fields to each nested level + for (int j = 0; j < 10; j++) { + String field = "field" + j; + String value = generateRandomString(random); + nestedObject.put(field, value); + } + + // Add the nested object to the parent object + String nestedObjectName = subObjectName + i; + json.put(nestedObjectName, nestedObject); + } + + // Return the JSON document as a string + JSONObject returnJson = new JSONObject(); + returnJson.put(subObjectName + "0", json); + return returnJson.toString(); + } + + private static String generateRandomString(Random random) { + String alphabet = "abcdefghijklmnopqrstuvwxyz"; + int length = 10; + StringBuilder randomString = new StringBuilder(); + for (int i = 0; i < length; i++) { + randomString.append(alphabet.charAt(random.nextInt(alphabet.length()))); + randomString.append(random.nextInt(10)); + } + return randomString.toString(); + } + + private static Map findNestedValueAndPath(String randomJsonString, int levelNumber, String currentPath) { + JSONObject jsonObject = new JSONObject(randomJsonString); + String targetKey = "field" + levelNumber; + Map result = new HashMap<>(); + Iterator keys = jsonObject.keys(); + StringBuilder path = new StringBuilder(); + while (keys.hasNext()) { + String key = keys.next(); + if (path.length() == 0) { + path.append(currentPath); + } + + Object value = jsonObject.get(key); + if (key.equals(targetKey)) { + result.put("value", value.toString()); + result.put("path", key); + System.out.println("value is " + value.toString()); + System.out.println("path is " + path.toString()); + break; + } + if (value instanceof JSONObject) { + + path.append("." + key); + + Map nestedResult = findNestedValueAndPath(value.toString(), levelNumber, path.toString()); + if (!nestedResult.isEmpty()) { + return nestedResult; + } + } + } + return result; + } + +} diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java new file mode 100644 index 0000000000000..755bf509ab9f5 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -0,0 +1,285 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.common.xcontent; + +import org.opensearch.common.bytes.BytesReference; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.common.xcontent.support.AbstractXContentParser; +import org.opensearch.index.mapper.ParseContext; +import java.io.IOException; +import java.nio.CharBuffer; +import java.util.ArrayList; +import java.util.logging.Logger; + +/** + * JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser + * returns XContentParser with 3 string fields + * fieldName, fieldName._value, fieldName._valueAndPath + * @opensearch.internal + */ +public class JsonToStringXContentParser extends AbstractXContentParser { + private final String fieldTypeName; + private XContentParser parser; + + private ArrayList valueList = new ArrayList<>(); + private ArrayList valueAndPathList = new ArrayList<>(); + private ArrayList keyList = new ArrayList<>(); + + private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent); + private ParseContext parseContext; + + private NamedXContentRegistry xContentRegistry; + + private DeprecationHandler deprecationHandler; + /** + * logging function + * To removed after draft PR + */ + + private static final Logger logger = Logger.getLogger((JsonToStringXContentParser.class.getName())); + + public JsonToStringXContentParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + ParseContext parseContext, + String fieldTypeName + ) throws IOException { + super(xContentRegistry, deprecationHandler); + this.parseContext = parseContext; + this.deprecationHandler = deprecationHandler; + this.xContentRegistry = xContentRegistry; + this.parser = parseContext.parser(); + this.fieldTypeName = fieldTypeName; + } + + public XContentParser parseObject() throws IOException { + builder.startObject(); + parseToken(); + builder.field(this.fieldTypeName, keyList); + builder.field(this.fieldTypeName + "._value", valueList); + builder.field(this.fieldTypeName + "._valueAndPath", valueAndPathList); + builder.endObject(); + String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, XContentType.JSON); + logger.info("Before createParser, jString: " + jString + "\n"); + return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString)); + } + + private void parseToken() throws IOException { + String currentFieldName; + while (this.parser.nextToken() != Token.END_OBJECT) { + + currentFieldName = this.parser.currentName(); + + logger.info("currentFieldName: " + currentFieldName + "\n"); + StringBuilder parsedFields = new StringBuilder(); + StringBuilder path = new StringBuilder(fieldTypeName); + if (this.parser.nextToken() == Token.START_OBJECT) { + /** + * for nested Json, make a copy of parser, then parse the entire Json as string. + * for example: + * {"grandpa": { + * "dad": { + * "son": "me" + * } } + * the JSON object would be read as three string fields for "grandpa" would be + * grandpa: {"dad","son"} -- the parent string field contains the keys only. + * grandpa._value: { "{dad: {son: me}}} ,"{son: me}","me"} -- the _value sub string field contains the values only. + * grandpa._pathAndValue: { "grandpa={"dad: {son: me}}}","grandpa.dad={son: me}}", "grandpa.dad.son=me"} + * -- the _pathAndValue sub string field contains the "path=Value" format. + */ + // TODO: to convert the entire JsonObject as string without changing the tokenizer position. + path.append("." + currentFieldName); + parsedFields.append(this.parser.toString()); + this.keyList.add(currentFieldName); + this.valueList.add(parsedFields.toString()); + this.valueAndPathList.add(path + "=" + parsedFields.toString()); + parseToken(); + } else { + path.append("." + currentFieldName); + parseValue(currentFieldName, parsedFields); + this.keyList.add(currentFieldName); + this.valueList.add(parsedFields.toString()); + this.valueAndPathList.add(path + "=" + parsedFields.toString()); + } + + } + } + + private void parseValue(String currentFieldName, StringBuilder parsedFields) throws IOException { + logger.info("this.parser.currentToken(): " + this.parser.currentToken() + "\n"); + switch (this.parser.currentToken()) { + case VALUE_STRING: + parsedFields.append(this.parser.textOrNull()); + logger.info("currentFieldName and parsedFields :" + currentFieldName + " " + parsedFields.toString() + "\n"); + break; + // Handle other token types as needed + // ToDo, what do we do, if encountered these fields? + // should never get to START_OBJECT + case START_OBJECT: + throw new IOException("Unsupported token type"); + case FIELD_NAME: + // should never get to FIELD_NAME + logger.info("token is FIELD_NAME: " + this.parser.currentName() + "\n"); + break; + case VALUE_EMBEDDED_OBJECT: + logger.info("token is VALUE_EMBEDDED_OBJECT: " + this.parser.objectText() + "\n"); + break; + default: + throw new IOException("Unsupported token type [" + parser.currentToken() + "]"); + } + } + + @Override + public XContentType contentType() { + return XContentType.JSON; + } + + @Override + public Token nextToken() throws IOException { + return this.parser.nextToken(); + } + + @Override + public void skipChildren() throws IOException { + this.parser.skipChildren(); + } + + @Override + public Token currentToken() { + return this.parser.currentToken(); + } + + @Override + public String currentName() throws IOException { + return this.parser.currentName(); + } + + @Override + public String text() throws IOException { + return this.parser.text(); + } + + @Override + public CharBuffer charBuffer() throws IOException { + return this.parser.charBuffer(); + } + + @Override + public Object objectText() throws IOException { + return this.parser.objectText(); + } + + @Override + public Object objectBytes() throws IOException { + return this.parser.objectBytes(); + } + + @Override + public boolean hasTextCharacters() { + return this.parser.hasTextCharacters(); + } + + @Override + public char[] textCharacters() throws IOException { + return this.parser.textCharacters(); + } + + @Override + public int textLength() throws IOException { + return this.parser.textLength(); + } + + @Override + public int textOffset() throws IOException { + return this.parser.textOffset(); + } + + @Override + public Number numberValue() throws IOException { + return this.parser.numberValue(); + } + + @Override + public NumberType numberType() throws IOException { + return this.parser.numberType(); + } + + @Override + public byte[] binaryValue() throws IOException { + return this.parser.binaryValue(); + } + + @Override + public XContentLocation getTokenLocation() { + return this.parser.getTokenLocation(); + } + + @Override + protected boolean doBooleanValue() throws IOException { + return this.parser.booleanValue(); + } + + @Override + protected short doShortValue() throws IOException { + return this.parser.shortValue(); + } + + @Override + protected int doIntValue() throws IOException { + return this.parser.intValue(); + } + + @Override + protected long doLongValue() throws IOException { + return this.parser.longValue(); + } + + @Override + protected float doFloatValue() throws IOException { + return this.parser.floatValue(); + } + + @Override + protected double doDoubleValue() throws IOException { + return this.parser.doubleValue(); + } + + @Override + public boolean isClosed() { + return this.parser.isClosed(); + } + + @Override + public void close() throws IOException { + this.parser.close(); + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java new file mode 100644 index 0000000000000..3636567e815b2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -0,0 +1,769 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.Nullable; +import org.opensearch.common.collect.Iterators; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.xcontent.DeprecationHandler; +import org.opensearch.common.xcontent.NamedXContentRegistry; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.common.xcontent.JsonToStringXContentParser; +import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.analysis.NamedAnalyzer; +import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.similarity.SimilarityProvider; +import org.opensearch.search.aggregations.support.CoreValuesSourceType; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.Supplier; + +import java.util.logging.Logger; + +/** + * A field mapper for flat-objects. This mapper accepts JSON object and treat as string fields in one index. + * @opensearch.internal + */ +public final class FlatObjectFieldMapper extends ParametrizedFieldMapper { + /** + * logging function: + * To remove after draft PR + */ + + private static final Logger logger = Logger.getLogger((FlatObjectFieldMapper.class.getName())); + + /** + * A flat-object mapping contains one parent field itself and two substring fields, + * field._valueAndPath and field._value + */ + + public static final String CONTENT_TYPE = "flat-object"; + private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; + private static final String VALUE_SUFFIX = "._value"; + + /** + * Default parameters, similar to keyword + * In flat-object, three fields are treated as keyword fields with the same parameters + * Cannot be tokenized, can OmitNorms, and can setIndexOption. + * @opensearch.internal + */ + public static class Defaults { + public static final FieldType FIELD_TYPE = new FieldType(); + + static { + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.freeze(); + } + } + + /** + * The flat-object field for the field mapper + * + * @opensearch.internal + */ + public static class FlatObjectField extends Field { + + public FlatObjectField(String field, BytesRef term, FieldType ft) { + super(field, term, ft); + } + + } + + private static FlatObjectFieldMapper toType(FieldMapper in) { + return (FlatObjectFieldMapper) in; + } + + /** + * The builder for the flat-object field mapper + * Set the same parameters from keywordFieldMapper.Builder + * @opensearch.internal + */ + public static class Builder extends ParametrizedFieldMapper.Builder { + + private final Parameter indexed = Parameter.indexParam(m -> toType(m).indexed, true); + + private final Parameter hasDocValues = Parameter.docValuesParam(m -> toType(m).hasDocValues, true); + private final Parameter stored = Parameter.storeParam(m -> toType(m).fieldType.stored(), false); + + private final Parameter nullValue = Parameter.stringParam("null_value", false, m -> toType(m).nullValue, null) + .acceptsNull(); + + private final Parameter eagerGlobalOrdinals = Parameter.boolParam( + "eager_global_ordinals", + true, + m -> toType(m).eagerGlobalOrdinals, + false + ); + private final Parameter ignoreAbove = Parameter.intParam( + "ignore_above", + true, + m -> toType(m).ignoreAbove, + Integer.MAX_VALUE + ); + + private final Parameter indexOptions = Parameter.restrictedStringParam( + "index_options", + false, + m -> toType(m).indexOptions, + "docs", + "freqs" + ); + private final Parameter hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false); + private final Parameter similarity = TextParams.similarity(m -> toType(m).similarity); + + private final Parameter normalizer = Parameter.stringParam("normalizer", false, m -> toType(m).normalizerName, "default"); + + private final Parameter splitQueriesOnWhitespace = Parameter.boolParam( + "split_queries_on_whitespace", + true, + m -> toType(m).splitQueriesOnWhitespace, + false + ); + + private final Parameter> meta = Parameter.metaParam(); + private final Parameter boost = Parameter.boostParam(); + private final IndexAnalyzers indexAnalyzers; + + public Builder(String name, IndexAnalyzers indexAnalyzers) { + super(name); + this.indexAnalyzers = indexAnalyzers; + } + + public Builder(String name) { + this(name, null); + } + + public Builder ignoreAbove(int ignoreAbove) { + this.ignoreAbove.setValue(ignoreAbove); + return this; + } + + Builder normalizer(String normalizerName) { + this.normalizer.setValue(normalizerName); + return this; + } + + Builder nullValue(String nullValue) { + this.nullValue.setValue(nullValue); + return this; + } + + public Builder docValues(boolean hasDocValues) { + this.hasDocValues.setValue(hasDocValues); + return this; + } + + public Builder index(boolean index) { + return this; + } + + public Builder store(boolean store) { + this.stored.setValue(store); + return this; + } + + @Override + protected List> getParameters() { + return Arrays.asList( + indexed, + hasDocValues, + stored, + nullValue, + eagerGlobalOrdinals, + ignoreAbove, + indexOptions, + hasNorms, + similarity, + normalizer, + splitQueriesOnWhitespace, + boost, + meta + ); + } + + /** + * FlatObjectFieldType is the parent field type. the parent field enables KEYWORD_ANALYZER, + * allows normalizer and splitQueriesOnWhitespace + */ + private FlatObjectFieldType buildFlatObjectFieldType(BuilderContext context, FieldType fieldType) { + NamedAnalyzer normalizer = Lucene.KEYWORD_ANALYZER; + NamedAnalyzer searchAnalyzer = Lucene.KEYWORD_ANALYZER; + String normalizerName = this.normalizer.getValue(); + if (Objects.equals(normalizerName, "default") == false) { + assert indexAnalyzers != null; + normalizer = indexAnalyzers.getNormalizer(normalizerName); + if (normalizer == null) { + throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + name + "]"); + } + if (splitQueriesOnWhitespace.getValue()) { + searchAnalyzer = indexAnalyzers.getWhitespaceNormalizer(normalizerName); + } else { + searchAnalyzer = normalizer; + } + } else if (splitQueriesOnWhitespace.getValue()) { + searchAnalyzer = Lucene.WHITESPACE_ANALYZER; + } + return new FlatObjectFieldType(buildFullName(context), fieldType, normalizer, searchAnalyzer, this); + } + + /** + * ValueFieldMapper is the sub field type for values in the Json. + * use a keywordfieldtype + */ + private ValueFieldMapper buildValueFieldMapper(BuilderContext context, FieldType fieldType, FlatObjectFieldType fft) { + String fullName = buildFullName(context); + FieldType vft = new FieldType(fieldType); + vft.setOmitNorms(this.hasNorms.getValue() == false); + KeywordFieldMapper.KeywordFieldType valueFieldType = new KeywordFieldMapper.KeywordFieldType(fullName + "._value", vft); + // TODO: revisit analyzer object + fft.setValueFieldType(valueFieldType); + return new ValueFieldMapper(vft, valueFieldType); + + } + + /** + * ValueAndPathFieldMapper is the sub field type for path=value format in the Json. + * also use a keywordfieldtype + */ + private ValueAndPathFieldMapper buildValueAndPathFieldMapper(BuilderContext context, FieldType fieldType, FlatObjectFieldType fft) { + + String fullName = buildFullName(context); + FieldType vft = new FieldType(fieldType); + vft.setOmitNorms(this.hasNorms.getValue() == false); + KeywordFieldMapper.KeywordFieldType ValueAndPathFieldType = new KeywordFieldMapper.KeywordFieldType( + fullName + "._valueAndPath", + vft + ); + // TODO: revisit analyzer object + fft.setValueAndPathFieldType(ValueAndPathFieldType); + return new ValueAndPathFieldMapper(vft, ValueAndPathFieldType); + } + + /** + * FlatObjectFieldMapper builds the FLatObjectFieldMapper itself, and also build the two sub fieldMappers: + * ValueFieldMapper and ValueAndPathFieldMapper + */ + @Override + public FlatObjectFieldMapper build(BuilderContext context) { + FieldType fieldtype = new FieldType(Defaults.FIELD_TYPE); + fieldtype.setOmitNorms(this.hasNorms.getValue() == false); + fieldtype.setIndexOptions(TextParams.toIndexOptions(this.indexed.getValue(), this.indexOptions.getValue())); + fieldtype.setStored(this.stored.getValue()); + FlatObjectFieldType fft = buildFlatObjectFieldType(context, fieldtype); + return new FlatObjectFieldMapper( + name, + fieldtype, + fft, + buildValueFieldMapper(context, fieldtype, fft), + buildValueAndPathFieldMapper(context, fieldtype, fft), + multiFieldsBuilder.build(this, context), + copyTo.build(), + this + ); + } + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers())); + + /** + * Field type for flat-object fields + * one flat-object fields contains its own fieldType, one valueFieldType and one valueAndPathFieldType + * @opensearch.internal + */ + public static final class FlatObjectFieldType extends StringFieldType { + + private final int ignoreAbove; + private final String nullValue; + + private KeywordFieldMapper.KeywordFieldType valueFieldType; + + private KeywordFieldMapper.KeywordFieldType valueAndPathFieldType; + + public FlatObjectFieldType( + String name, + FieldType fieldType, + NamedAnalyzer normalizer, + NamedAnalyzer searchAnalyzer, + Builder builder + ) { + super( + name, + fieldType.indexOptions() != IndexOptions.NONE, + fieldType.stored(), + builder.hasDocValues.getValue(), + new TextSearchInfo(fieldType, builder.similarity.getValue(), searchAnalyzer, searchAnalyzer), + builder.meta.getValue() + ); + setEagerGlobalOrdinals(builder.eagerGlobalOrdinals.getValue()); + setIndexAnalyzer(normalizer); + setBoost(builder.boost.getValue()); + this.ignoreAbove = builder.ignoreAbove.getValue(); + this.nullValue = builder.nullValue.getValue(); + } + + public FlatObjectFieldType(String name, boolean isSearchable, boolean hasDocValues, Map meta) { + super(name, isSearchable, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); + setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); + this.ignoreAbove = Integer.MAX_VALUE; + this.nullValue = null; + } + + public FlatObjectFieldType(String name) { + this(name, true, true, Collections.emptyMap()); + } + + public FlatObjectFieldType(String name, FieldType fieldType) { + super( + name, + fieldType.indexOptions() != IndexOptions.NONE, + false, + false, + new TextSearchInfo(fieldType, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER), + Collections.emptyMap() + ); + this.ignoreAbove = Integer.MAX_VALUE; + this.nullValue = null; + } + + public FlatObjectFieldType(String name, NamedAnalyzer analyzer) { + super(name, true, false, true, new TextSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), Collections.emptyMap()); + this.ignoreAbove = Integer.MAX_VALUE; + this.nullValue = null; + } + + void setValueFieldType(KeywordFieldMapper.KeywordFieldType valueFieldType) { + this.valueFieldType = valueFieldType; + } + + void setValueAndPathFieldType(KeywordFieldMapper.KeywordFieldType ValueAndPathFieldType) { + this.valueAndPathFieldType = ValueAndPathFieldType; + } + + public KeywordFieldMapper.KeywordFieldType getValueFieldType() { + return this.valueFieldType; + } + + public KeywordFieldMapper.KeywordFieldType getValueAndPathFieldType() { + return this.valueAndPathFieldType; + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + NamedAnalyzer normalizer() { + return indexAnalyzer(); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { + failIfNoDocValues(); + return new SortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES); + } + + @Override + public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + + return new SourceValueFetcher(name(), context, nullValue) { + @Override + protected String parseSourceValue(Object value) { + String flatObjectKeywordValue = value.toString(); + + if (flatObjectKeywordValue.length() > ignoreAbove) { + return null; + } + + NamedAnalyzer normalizer = normalizer(); + if (normalizer == null) { + return flatObjectKeywordValue; + } + + try { + return normalizeValue(normalizer, name(), flatObjectKeywordValue); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + }; + } + + @Override + public Object valueForDisplay(Object value) { + if (value == null) { + return null; + } + // flat-objects are internally stored as utf8 bytes + BytesRef binaryValue = (BytesRef) value; + return binaryValue.utf8ToString(); + } + + @Override + protected BytesRef indexedValueForSearch(Object value) { + if (getTextSearchInfo().getSearchAnalyzer() == Lucene.KEYWORD_ANALYZER) { + // flat-object analyzer with the default attribute source which encodes terms using UTF8 + // in that case we skip normalization, which may be slow if there many terms need to + // parse (eg. large terms query) since Analyzer.normalize involves things like creating + // attributes through reflection + // This if statement will be used whenever a normalizer is NOT configured + return super.indexedValueForSearch(value); + } + + if (value == null) { + return null; + } + if (value instanceof BytesRef) { + value = ((BytesRef) value).utf8ToString(); + } + return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString()); + } + + @Override + public Query wildcardQuery( + String value, + @Nullable MultiTermQuery.RewriteMethod method, + boolean caseInsensitve, + QueryShardContext context + ) { + // flat-object field types are always normalized, so ignore case sensitivity and force normalize the wildcard + // query text + return super.wildcardQuery(value, method, caseInsensitve, true, context); + } + + } + + private final boolean indexed; + private final boolean hasDocValues; + private final String nullValue; + private final boolean eagerGlobalOrdinals; + private final int ignoreAbove; + private final String indexOptions; + private final FieldType fieldType; + private final SimilarityProvider similarity; + private final String normalizerName; + private final boolean splitQueriesOnWhitespace; + private final ValueFieldMapper valueFieldMapper; + private final ValueAndPathFieldMapper valueAndPathFieldMapper; + + private final IndexAnalyzers indexAnalyzers; + + protected FlatObjectFieldMapper( + String simpleName, + FieldType fieldType, + FlatObjectFieldType mappedFieldType, + ValueFieldMapper valueFieldMapper, + ValueAndPathFieldMapper valueAndPathFieldMapper, + MultiFields multiFields, + CopyTo copyTo, + Builder builder + ) { + super(simpleName, mappedFieldType, multiFields, copyTo); + assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; + this.indexed = builder.indexed.getValue(); + this.hasDocValues = builder.hasDocValues.getValue(); + this.nullValue = builder.nullValue.getValue(); + this.eagerGlobalOrdinals = builder.eagerGlobalOrdinals.getValue(); + this.ignoreAbove = builder.ignoreAbove.getValue(); + this.indexOptions = builder.indexOptions.getValue(); + this.fieldType = fieldType; + this.similarity = builder.similarity.getValue(); + this.normalizerName = builder.normalizer.getValue(); + this.splitQueriesOnWhitespace = builder.splitQueriesOnWhitespace.getValue(); + this.indexAnalyzers = builder.indexAnalyzers; + this.valueFieldMapper = valueFieldMapper; + this.valueAndPathFieldMapper = valueAndPathFieldMapper; + } + + /** + * TODO: Placeholder, this is used at keywordfieldmapper, considering to remove ignoreAbove + * Values that have more chars than the return value of this method will + * be skipped at parsing time. */ + public int ignoreAbove() { + return ignoreAbove; + } + + @Override + protected FlatObjectFieldMapper clone() { + return (FlatObjectFieldMapper) super.clone(); + } + + @Override + public FlatObjectFieldType fieldType() { + return (FlatObjectFieldType) super.fieldType(); + } + + @Override + protected void parseCreateField(ParseContext context) throws IOException { + String value = null; + String fieldName = null; + + if (context.externalValueSet()) { + value = context.externalValue().toString(); + ParseValueAddFields(context, value, fieldType().name()); + } else { + JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + context, + fieldType().name() + ); + /** + * JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser + */ + XContentParser parser = JsonToStringParser.parseObject(); + + XContentParser.Token currentToken; + while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + switch (currentToken) { + case FIELD_NAME: + fieldName = parser.currentName(); + logger.info("fieldName: " + fieldName); + break; + case VALUE_STRING: + value = parser.textOrNull(); + logger.info("value: " + value); + ParseValueAddFields(context, value, fieldName); + break; + } + + } + + } + + } + + @Override + public Iterator iterator() { + List subIterators = new ArrayList<>(); + if (valueFieldMapper != null) { + subIterators.add(valueFieldMapper); + } + if (valueAndPathFieldMapper != null) { + subIterators.add(valueAndPathFieldMapper); + } + if (subIterators.size() == 0) { + return super.iterator(); + } + @SuppressWarnings("unchecked") + Iterator concat = Iterators.concat(super.iterator(), subIterators.iterator()); + return concat; + } + + private void ParseValueAddFields(ParseContext context, String value, String fieldName) throws IOException { + if (value == null || value.length() > ignoreAbove) { + return; + } + + NamedAnalyzer normalizer = fieldType().normalizer(); + if (normalizer != null) { + value = normalizeValue(normalizer, name(), value); + } + + String[] valueTypeList = fieldName.split("\\._"); + String valueType = "._" + valueTypeList[valueTypeList.length - 1]; + logger.info("valueType: " + valueType); + /** + * the JsonToStringXContentParser returns XContentParser with 3 string fields + * fieldName, fieldName._value, fieldName._valueAndPath + */ + + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + logger.info("FlatObjectField name is " + fieldType().name()); + logger.info("FlatObjectField value is " + value); + // convert to utf8 only once before feeding postings/dv/stored fields + + final BytesRef binaryValue = new BytesRef(value); + Field field = new FlatObjectField(fieldType().name(), binaryValue, fieldType); + + if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { + createFieldNamesField(context); + } + /** + * Indentified by the stringfield name, + * fieldName will be store through the parent FlatFieldMapper,which contains all the keys + * fieldName._value will be store through the valueFieldMapper, which contains the values of the Json Object + * fieldName._valueAndPath will be store through the valueAndPathFieldMapper, which contains the values of + * the Json Object. + */ + if (fieldName.equals(fieldType().name())) { + context.doc().add(field); + } + if (valueType.equals(VALUE_SUFFIX)) { + if (valueFieldMapper != null) { + logger.info("valueFieldMapper value is " + value); + valueFieldMapper.addField(context, value); + } + } + if (valueType.equals(VALUE_AND_PATH_SUFFIX)) { + if (valueAndPathFieldMapper != null) { + logger.info("valueAndPathFieldMapper value is " + value); + valueAndPathFieldMapper.addField(context, value); + } + } + + // TODo: to revisit if flat-object needs docValues. + if (fieldType().hasDocValues()) { + if (context.doc().getField(fieldType().name()) == null || !context.doc().getFields(fieldType().name()).equals(field)) { + context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); + } + } + + } + + } + + private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException { + + try (TokenStream ts = normalizer.tokenStream(field, value)) { + final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + if (ts.incrementToken() == false) { + throw new IllegalStateException( + "The normalization token stream is " + + "expected to produce exactly 1 token, but got 0 for analyzer " + + normalizer + + " and input \"" + + value + + "\"" + ); + } + final String newValue = termAtt.toString(); + if (ts.incrementToken()) { + throw new IllegalStateException( + "The normalization token stream is " + + "expected to produce exactly 1 token, but got 2+ for analyzer " + + normalizer + + " and input \"" + + value + + "\"" + ); + } + ts.end(); + return newValue; + } + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + public ParametrizedFieldMapper.Builder getMergeBuilder() { + return new Builder(simpleName(), indexAnalyzers).init(this); + } + + // TODO Further simplify the code by new KeyWordFieldMapper to be ValueAndPathFieldMapper and ValueFieldMapper + private static final class ValueAndPathFieldMapper extends FieldMapper { + + protected ValueAndPathFieldMapper(FieldType fieldType, KeywordFieldMapper.KeywordFieldType mappedFieldType) { + super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); + } + + void addField(ParseContext context, String value) { + // context.doc().add(new Field(fieldType().name(), value, fieldType)); + final BytesRef binaryValue = new BytesRef(value); + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + Field field = new KeywordFieldMapper.KeywordField(fieldType().name(), binaryValue, fieldType); + // Field field = new (fieldType().name()+VALUE_AND_PATH_SUFFIX, binaryValue, fieldType); + + context.doc().add(field); + + if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { + createFieldNamesField(context); + } + } + } + + @Override + protected void parseCreateField(ParseContext context) { + throw new UnsupportedOperationException(); + } + + @Override + protected void mergeOptions(FieldMapper other, List conflicts) { + + } + + @Override + protected String contentType() { + return "valueAndPath"; + } + + @Override + public String toString() { + return fieldType().toString(); + } + } + + private static final class ValueFieldMapper extends FieldMapper { + + protected ValueFieldMapper(FieldType fieldType, KeywordFieldMapper.KeywordFieldType mappedFieldType) { + super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); + } + + void addField(ParseContext context, String value) { + final BytesRef binaryValue = new BytesRef(value); + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + Field field = new KeywordFieldMapper.KeywordField(fieldType().name(), binaryValue, fieldType); + context.doc().add(field); + + if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { + createFieldNamesField(context); + } + } + } + + @Override + protected void parseCreateField(ParseContext context) { + throw new UnsupportedOperationException(); + } + + @Override + protected void mergeOptions(FieldMapper other, List conflicts) { + + } + + @Override + protected String contentType() { + return "value"; + } + + @Override + public String toString() { + return fieldType().toString(); + } + } + +} From 42f730afe8141caaa5a77782a4ca1f93af921489 Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Mon, 6 Mar 2023 20:05:27 -0800 Subject: [PATCH 02/25] resolve import package for HttpHost Signed-off-by: Mingshi Liu --- .../benchmark/index/mapper/FlatObjectMappingBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java index c3000eba60993..1bf310b28563a 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java @@ -8,7 +8,7 @@ package org.opensearch.benchmark.index.mapper; -import org.apache.hc.core5.http.HttpHost; +import org.apache.http.HttpHost; import org.json.JSONObject; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; From 5932bc7c75e3e6b71ecc7cd8e3f19cf057dcd33a Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Mon, 20 Mar 2023 23:29:41 -0700 Subject: [PATCH 03/25] Dynamic Create FlatObjectFieldType for dotpath field Signed-off-by: Mingshi Liu --- .../mapper/FlatObjectMappingBenchmark.java | 111 ++--- .../xcontent/JsonToStringXContentParser.java | 47 +- .../index/mapper/FlatObjectFieldMapper.java | 451 +++++++++--------- .../org/opensearch/indices/IndicesModule.java | 2 + .../mapper/FlatObjectFieldMapperTests.java | 113 +++++ 5 files changed, 397 insertions(+), 327 deletions(-) create mode 100644 server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java index 1bf310b28563a..32250edf0b0c6 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java @@ -41,8 +41,6 @@ import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.SearchHits; import org.opensearch.search.builder.SearchSourceBuilder; -import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; -import org.opensearch.search.sort.SortOrder; import java.io.IOException; import java.net.URISyntaxException; @@ -54,8 +52,8 @@ @State(Scope.Thread) @Fork(1) -@Warmup(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 100, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) public class FlatObjectMappingBenchmark { @@ -87,7 +85,7 @@ public void tearDown() throws Exception { @Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void CreateDynamicIndex(MyState state) throws IOException, URISyntaxException { + public void CreateDynamicIndex(MyState state) throws IOException { GetDynamicIndex(state, "demo-dynamic-test"); DeleteIndex(state, "demo-dynamic-test"); } @@ -99,7 +97,7 @@ public void CreateDynamicIndex(MyState state) throws IOException, URISyntaxExcep @Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void CreateFlatObjectIndex(MyState state) throws IOException, URISyntaxException { + public void CreateFlatObjectIndex(MyState state) throws IOException { GetFlatObjectIndex(state, "demo-flat-object-test", "host"); DeleteIndex(state, "demo-flat-object-test"); } @@ -112,10 +110,10 @@ public void CreateFlatObjectIndex(MyState state) throws IOException, URISyntaxEx @Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void indexDynamicMapping(MyState state) throws IOException, URISyntaxException { + public void indexDynamicMapping(MyState state) throws IOException { GetDynamicIndex(state, "demo-dynamic-test1"); String doc = - "{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; + "{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; UploadDoc(state, "demo-dynamic-test1", doc); DeleteIndex(state, "demo-dynamic-test1"); } @@ -130,7 +128,7 @@ public void indexDynamicMapping(MyState state) throws IOException, URISyntaxExce public void indexFlatObjectMapping(MyState state) throws IOException, URISyntaxException { GetFlatObjectIndex(state, "demo-flat-object-test1", "host"); String doc = - "{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; + "{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; UploadDoc(state, "demo-flat-object-test1", doc); DeleteIndex(state, "demo-flat-object-test1"); } @@ -146,7 +144,7 @@ public void searchDynamicMapping(MyState state) throws IOException { String indexName = "demo-dynamic-test2"; GetDynamicIndex(state, indexName); String doc = - "{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; + "{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; UploadDoc(state, indexName, doc); SearchDoc(state, indexName, "host.hostname", "bionic", "@timestamp", "message"); DeleteIndex(state, indexName); @@ -162,9 +160,9 @@ public void searchDynamicMapping(MyState state) throws IOException { public void searchFlatObjectMapping(MyState state) throws IOException { GetFlatObjectIndex(state, "demo-flat-object-test2", "host"); String doc = - "{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; + "{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; UploadDoc(state, "demo-flat-object-test2", doc); - SearchDoc(state, "demo-flat-object-test2", "host", "name", "@timestamp", "message"); + SearchDoc(state, "demo-flat-object-test2", "host.hostname", "name", "@timestamp", "message"); DeleteIndex(state, "demo-flat-object-test2"); } @@ -174,27 +172,23 @@ public void searchFlatObjectMapping(MyState state) throws IOException { * search for document and delete index * Caught exceptions with the number of fields over 1000 */ - // @Benchmark - // @BenchmarkMode(Mode.AverageTime) - // @OutputTimeUnit(TimeUnit.MILLISECONDS) - // public void searchDynamicMappingWithOneHundredNestedJSON(MyState state) throws IOException { - // - // String indexName = "demo-dynamic-test3"; - // GetDynamicIndex(state, indexName); - // String doc = GenerateRandomJson(); - // Map searchValueAndPath = findNestedValueAndPath(doc,99, "field0"); - // String searchValue = searchValueAndPath.get("value"); - // String searchFieldName = searchValueAndPath.get("path"); - // UploadDoc(state, indexName, doc); - // SearchDoc(state,indexName,searchFieldName,searchValue,searchValue ,searchFieldName ); - // DeleteIndex(state, indexName); - // } + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void searchDynamicMappingWithOneHundredNestedJSON(MyState state) throws IOException { + String indexName = "demo-dynamic-test3"; + GetDynamicIndex(state, indexName); + String doc = GenerateRandomJson(10, "nested"); + Map searchValueAndPath = findNestedValueAndPath(doc, 26, ""); + String searchValue = searchValueAndPath.get("value"); + String searchFieldName = searchValueAndPath.get("path"); + UploadDoc(state, indexName, doc); + SearchDoc(state, indexName, searchFieldName, searchValue, searchFieldName, searchFieldName); + DeleteIndex(state, indexName); + } /** - * FlatObjectIndex: - * create index, upload a nested document in 100 levels, and each level with 10 fields, - * search for document and delete index - * works fine and able to return document + * debug search in dotpath */ @Benchmark @BenchmarkMode(Mode.AverageTime) @@ -202,10 +196,10 @@ public void searchFlatObjectMapping(MyState state) throws IOException { public void searchFlatObjectMappingInValueWithOneHundredNestedJSON(MyState state) throws IOException { String indexName = "demo-flat-object-test4"; GetFlatObjectIndex(state, indexName, "nested0"); - String doc = GenerateRandomJson(100, "nested"); - Map searchValueAndPath = findNestedValueAndPath(doc, 6, "nested0"); + String doc = GenerateRandomJson(10, "nested"); + Map searchValueAndPath = findNestedValueAndPath(doc, 26, ""); String SearchRandomWord = searchValueAndPath.get("value"); - String SearchRandomPath = "nested0._value"; + String SearchRandomPath = searchValueAndPath.get("path"); String searchFieldName = "nested0"; UploadDoc(state, indexName, doc); SearchDoc(state, indexName, SearchRandomPath, SearchRandomWord, searchFieldName, searchFieldName); @@ -277,20 +271,15 @@ private static void SearchDoc( sourceBuilder.query(QueryBuilders.matchQuery(searchFieldName, searchText)); sourceBuilder.from(0); sourceBuilder.size(10); - sourceBuilder.sort(sortFieldName, SortOrder.DESC); - sourceBuilder.highlighter(new HighlightBuilder().field(highlightFieldName)); SearchRequest searchRequest = new SearchRequest(indexName); searchRequest.source(sourceBuilder); SearchResponse SearchResponse = state.client.search(searchRequest, RequestOptions.DEFAULT); if (!SearchResponse.status().toString().equals("OK")) { - System.out.println("the number of hit is: " + SearchResponse.getHits().getTotalHits().value); - System.out.println("SearchResponse: " + SearchResponse.toString()); - } - - SearchHits hits = SearchResponse.getHits(); - long totalHits = hits.getTotalHits().value; - if (totalHits == 0) { - throw new IOException("No hit is found"); + SearchHits hits = SearchResponse.getHits(); + long totalHits = hits.getTotalHits().value; + if (totalHits == 0) { + throw new IOException("No hit is found"); + } } } @@ -298,14 +287,14 @@ private static String GenerateRandomJson(int numberOfNestedLevel, String subObje JSONObject json = new JSONObject(); Random random = new Random(); - // Create 100 nested levels + // Create nested levels for (int i = 0; i < numberOfNestedLevel; i++) { JSONObject nestedObject = new JSONObject(); // Add 10 fields to each nested level for (int j = 0; j < 10; j++) { - String field = "field" + j; + String field = "field" + i + j; String value = generateRandomString(random); nestedObject.put(field, value); } @@ -337,31 +326,37 @@ private static Map findNestedValueAndPath(String randomJsonStrin String targetKey = "field" + levelNumber; Map result = new HashMap<>(); Iterator keys = jsonObject.keys(); - StringBuilder path = new StringBuilder(); while (keys.hasNext()) { String key = keys.next(); - if (path.length() == 0) { - path.append(currentPath); - } - Object value = jsonObject.get(key); if (key.equals(targetKey)) { result.put("value", value.toString()); - result.put("path", key); - System.out.println("value is " + value.toString()); - System.out.println("path is " + path.toString()); - break; + if (currentPath.length() == 0) { + currentPath = key; + } + result.put("path", currentPath + "." + key); + return result; } - if (value instanceof JSONObject) { - path.append("." + key); + if (value instanceof JSONObject) { + if (currentPath.length() == 0) { + currentPath = key; + } else { + if (currentPath.contains(".") && currentPath.split("\\.").length > 1) { + int pathLength = currentPath.split("\\.").length; + currentPath = "nested0." + key; + } else { + currentPath = currentPath + "." + key; + } - Map nestedResult = findNestedValueAndPath(value.toString(), levelNumber, path.toString()); + } + Map nestedResult = findNestedValueAndPath(value.toString(), levelNumber, currentPath); if (!nestedResult.isEmpty()) { return nestedResult; } } } + return result; } diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 755bf509ab9f5..a4b78c0ee575a 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -38,11 +38,10 @@ import java.io.IOException; import java.nio.CharBuffer; import java.util.ArrayList; -import java.util.logging.Logger; /** * JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser - * returns XContentParser with 3 string fields + * returns XContentParser with one parent field and subfields * fieldName, fieldName._value, fieldName._valueAndPath * @opensearch.internal */ @@ -60,12 +59,11 @@ public class JsonToStringXContentParser extends AbstractXContentParser { private NamedXContentRegistry xContentRegistry; private DeprecationHandler deprecationHandler; - /** - * logging function - * To removed after draft PR - */ - private static final Logger logger = Logger.getLogger((JsonToStringXContentParser.class.getName())); + private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; + private static final String VALUE_SUFFIX = "._value"; + private static final String DOT_SYMBOL = "."; + private static final String EQUAL_SYMBOL = "="; public JsonToStringXContentParser( NamedXContentRegistry xContentRegistry, @@ -85,11 +83,10 @@ public XContentParser parseObject() throws IOException { builder.startObject(); parseToken(); builder.field(this.fieldTypeName, keyList); - builder.field(this.fieldTypeName + "._value", valueList); - builder.field(this.fieldTypeName + "._valueAndPath", valueAndPathList); + builder.field(this.fieldTypeName + VALUE_SUFFIX, valueList); + builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, valueAndPathList); builder.endObject(); String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, XContentType.JSON); - logger.info("Before createParser, jString: " + jString + "\n"); return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString)); } @@ -98,60 +95,38 @@ private void parseToken() throws IOException { while (this.parser.nextToken() != Token.END_OBJECT) { currentFieldName = this.parser.currentName(); - - logger.info("currentFieldName: " + currentFieldName + "\n"); StringBuilder parsedFields = new StringBuilder(); StringBuilder path = new StringBuilder(fieldTypeName); if (this.parser.nextToken() == Token.START_OBJECT) { - /** - * for nested Json, make a copy of parser, then parse the entire Json as string. - * for example: - * {"grandpa": { - * "dad": { - * "son": "me" - * } } - * the JSON object would be read as three string fields for "grandpa" would be - * grandpa: {"dad","son"} -- the parent string field contains the keys only. - * grandpa._value: { "{dad: {son: me}}} ,"{son: me}","me"} -- the _value sub string field contains the values only. - * grandpa._pathAndValue: { "grandpa={"dad: {son: me}}}","grandpa.dad={son: me}}", "grandpa.dad.son=me"} - * -- the _pathAndValue sub string field contains the "path=Value" format. - */ // TODO: to convert the entire JsonObject as string without changing the tokenizer position. - path.append("." + currentFieldName); + path.append(DOT_SYMBOL + currentFieldName); parsedFields.append(this.parser.toString()); this.keyList.add(currentFieldName); this.valueList.add(parsedFields.toString()); - this.valueAndPathList.add(path + "=" + parsedFields.toString()); + this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields.toString()); parseToken(); } else { - path.append("." + currentFieldName); + path.append(DOT_SYMBOL + currentFieldName); parseValue(currentFieldName, parsedFields); this.keyList.add(currentFieldName); this.valueList.add(parsedFields.toString()); - this.valueAndPathList.add(path + "=" + parsedFields.toString()); + this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields.toString()); } } } private void parseValue(String currentFieldName, StringBuilder parsedFields) throws IOException { - logger.info("this.parser.currentToken(): " + this.parser.currentToken() + "\n"); switch (this.parser.currentToken()) { case VALUE_STRING: parsedFields.append(this.parser.textOrNull()); - logger.info("currentFieldName and parsedFields :" + currentFieldName + " " + parsedFields.toString() + "\n"); break; // Handle other token types as needed - // ToDo, what do we do, if encountered these fields? - // should never get to START_OBJECT case START_OBJECT: throw new IOException("Unsupported token type"); case FIELD_NAME: - // should never get to FIELD_NAME - logger.info("token is FIELD_NAME: " + this.parser.currentName() + "\n"); break; case VALUE_EMBEDDED_OBJECT: - logger.info("token is VALUE_EMBEDDED_OBJECT: " + this.parser.objectText() + "\n"); break; default: throw new IOException("Unsupported token type [" + parser.currentToken() + "]"); diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 3636567e815b2..76784a17c16e2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -14,12 +14,20 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; +import org.opensearch.OpenSearchException; import org.opensearch.common.Nullable; import org.opensearch.common.collect.Iterators; import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.lucene.search.AutomatonQueries; import org.opensearch.common.xcontent.DeprecationHandler; import org.opensearch.common.xcontent.NamedXContentRegistry; import org.opensearch.common.xcontent.XContentParser; @@ -29,34 +37,27 @@ import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; import org.opensearch.index.query.QueryShardContext; -import org.opensearch.index.similarity.SimilarityProvider; +import org.opensearch.index.query.QueryShardException; import org.opensearch.search.aggregations.support.CoreValuesSourceType; import org.opensearch.search.lookup.SearchLookup; import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Objects; +import java.util.function.BiFunction; import java.util.function.Supplier; -import java.util.logging.Logger; +import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES; /** * A field mapper for flat-objects. This mapper accepts JSON object and treat as string fields in one index. * @opensearch.internal */ -public final class FlatObjectFieldMapper extends ParametrizedFieldMapper { - /** - * logging function: - * To remove after draft PR - */ - - private static final Logger logger = Logger.getLogger((FlatObjectFieldMapper.class.getName())); +public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper { /** * A flat-object mapping contains one parent field itself and two substring fields, @@ -66,10 +67,11 @@ public final class FlatObjectFieldMapper extends ParametrizedFieldMapper { public static final String CONTENT_TYPE = "flat-object"; private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; private static final String VALUE_SUFFIX = "._value"; + private static final String DOT_SYMBOL = "."; + private static final String EQUAL_SYMBOL = "="; /** - * Default parameters, similar to keyword - * In flat-object, three fields are treated as keyword fields with the same parameters + * In flat-object field mapper, field type is similar to keyword field type * Cannot be tokenized, can OmitNorms, and can setIndexOption. * @opensearch.internal */ @@ -82,12 +84,16 @@ public static class Defaults { FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); FIELD_TYPE.freeze(); } + + } + + @Override + public MappedFieldType keyedFieldType(String key) { + return new FlatObjectFieldType(this.name() + DOT_SYMBOL + key); } /** - * The flat-object field for the field mapper - * - * @opensearch.internal + * FlatObjectFieldType is the parent field type. */ public static class FlatObjectField extends Field { @@ -102,58 +108,21 @@ private static FlatObjectFieldMapper toType(FieldMapper in) { } /** - * The builder for the flat-object field mapper - * Set the same parameters from keywordFieldMapper.Builder + * The builder for the flat-object field mapper using default parameters as + * indexed: flat-object field mapper is default to be indexed. + * hasDocValues: to store index and support efficient access to individual field values. + * stored: the original value of the field is not stored in the index. + * nullValue: not accept null value + * ignoreAbove: exclude values that exceed the maximum length from the indexing process. * @opensearch.internal */ - public static class Builder extends ParametrizedFieldMapper.Builder { - - private final Parameter indexed = Parameter.indexParam(m -> toType(m).indexed, true); - - private final Parameter hasDocValues = Parameter.docValuesParam(m -> toType(m).hasDocValues, true); - private final Parameter stored = Parameter.storeParam(m -> toType(m).fieldType.stored(), false); - - private final Parameter nullValue = Parameter.stringParam("null_value", false, m -> toType(m).nullValue, null) - .acceptsNull(); - - private final Parameter eagerGlobalOrdinals = Parameter.boolParam( - "eager_global_ordinals", - true, - m -> toType(m).eagerGlobalOrdinals, - false - ); - private final Parameter ignoreAbove = Parameter.intParam( - "ignore_above", - true, - m -> toType(m).ignoreAbove, - Integer.MAX_VALUE - ); - - private final Parameter indexOptions = Parameter.restrictedStringParam( - "index_options", - false, - m -> toType(m).indexOptions, - "docs", - "freqs" - ); - private final Parameter hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false); - private final Parameter similarity = TextParams.similarity(m -> toType(m).similarity); - - private final Parameter normalizer = Parameter.stringParam("normalizer", false, m -> toType(m).normalizerName, "default"); - - private final Parameter splitQueriesOnWhitespace = Parameter.boolParam( - "split_queries_on_whitespace", - true, - m -> toType(m).splitQueriesOnWhitespace, - false - ); - - private final Parameter> meta = Parameter.metaParam(); - private final Parameter boost = Parameter.boostParam(); + public static class Builder extends FieldMapper.Builder { + private final IndexAnalyzers indexAnalyzers; - public Builder(String name, IndexAnalyzers indexAnalyzers) { - super(name); + Builder(String name, IndexAnalyzers indexAnalyzers) { + super(name, Defaults.FIELD_TYPE); + builder = this; this.indexAnalyzers = indexAnalyzers; } @@ -161,131 +130,50 @@ public Builder(String name) { this(name, null); } - public Builder ignoreAbove(int ignoreAbove) { - this.ignoreAbove.setValue(ignoreAbove); - return this; - } - - Builder normalizer(String normalizerName) { - this.normalizer.setValue(normalizerName); - return this; - } - - Builder nullValue(String nullValue) { - this.nullValue.setValue(nullValue); - return this; - } - - public Builder docValues(boolean hasDocValues) { - this.hasDocValues.setValue(hasDocValues); - return this; - } - - public Builder index(boolean index) { - return this; - } - - public Builder store(boolean store) { - this.stored.setValue(store); - return this; - } - - @Override - protected List> getParameters() { - return Arrays.asList( - indexed, - hasDocValues, - stored, - nullValue, - eagerGlobalOrdinals, - ignoreAbove, - indexOptions, - hasNorms, - similarity, - normalizer, - splitQueriesOnWhitespace, - boost, - meta - ); - } - - /** - * FlatObjectFieldType is the parent field type. the parent field enables KEYWORD_ANALYZER, - * allows normalizer and splitQueriesOnWhitespace - */ private FlatObjectFieldType buildFlatObjectFieldType(BuilderContext context, FieldType fieldType) { NamedAnalyzer normalizer = Lucene.KEYWORD_ANALYZER; - NamedAnalyzer searchAnalyzer = Lucene.KEYWORD_ANALYZER; - String normalizerName = this.normalizer.getValue(); - if (Objects.equals(normalizerName, "default") == false) { - assert indexAnalyzers != null; - normalizer = indexAnalyzers.getNormalizer(normalizerName); - if (normalizer == null) { - throw new MapperParsingException("normalizer [" + normalizerName + "] not found for field [" + name + "]"); - } - if (splitQueriesOnWhitespace.getValue()) { - searchAnalyzer = indexAnalyzers.getWhitespaceNormalizer(normalizerName); - } else { - searchAnalyzer = normalizer; - } - } else if (splitQueriesOnWhitespace.getValue()) { - searchAnalyzer = Lucene.WHITESPACE_ANALYZER; - } - return new FlatObjectFieldType(buildFullName(context), fieldType, normalizer, searchAnalyzer, this); + return new FlatObjectFieldType(buildFullName(context), fieldType); } /** - * ValueFieldMapper is the sub field type for values in the Json. + * ValueFieldMapper is the subfield type for values in the Json. * use a keywordfieldtype */ private ValueFieldMapper buildValueFieldMapper(BuilderContext context, FieldType fieldType, FlatObjectFieldType fft) { String fullName = buildFullName(context); FieldType vft = new FieldType(fieldType); - vft.setOmitNorms(this.hasNorms.getValue() == false); - KeywordFieldMapper.KeywordFieldType valueFieldType = new KeywordFieldMapper.KeywordFieldType(fullName + "._value", vft); - // TODO: revisit analyzer object + KeywordFieldMapper.KeywordFieldType valueFieldType = new KeywordFieldMapper.KeywordFieldType(fullName + VALUE_SUFFIX, vft); fft.setValueFieldType(valueFieldType); return new ValueFieldMapper(vft, valueFieldType); - } /** - * ValueAndPathFieldMapper is the sub field type for path=value format in the Json. + * ValueAndPathFieldMapper is the subfield type for path=value format in the Json. * also use a keywordfieldtype */ private ValueAndPathFieldMapper buildValueAndPathFieldMapper(BuilderContext context, FieldType fieldType, FlatObjectFieldType fft) { - String fullName = buildFullName(context); FieldType vft = new FieldType(fieldType); - vft.setOmitNorms(this.hasNorms.getValue() == false); KeywordFieldMapper.KeywordFieldType ValueAndPathFieldType = new KeywordFieldMapper.KeywordFieldType( - fullName + "._valueAndPath", + fullName + VALUE_AND_PATH_SUFFIX, vft ); - // TODO: revisit analyzer object fft.setValueAndPathFieldType(ValueAndPathFieldType); return new ValueAndPathFieldMapper(vft, ValueAndPathFieldType); } - /** - * FlatObjectFieldMapper builds the FLatObjectFieldMapper itself, and also build the two sub fieldMappers: - * ValueFieldMapper and ValueAndPathFieldMapper - */ @Override public FlatObjectFieldMapper build(BuilderContext context) { FieldType fieldtype = new FieldType(Defaults.FIELD_TYPE); - fieldtype.setOmitNorms(this.hasNorms.getValue() == false); - fieldtype.setIndexOptions(TextParams.toIndexOptions(this.indexed.getValue(), this.indexOptions.getValue())); - fieldtype.setStored(this.stored.getValue()); FlatObjectFieldType fft = buildFlatObjectFieldType(context, fieldtype); return new FlatObjectFieldMapper( name, - fieldtype, + Defaults.FIELD_TYPE, fft, buildValueFieldMapper(context, fieldtype, fft), buildValueAndPathFieldMapper(context, fieldtype, fft), - multiFieldsBuilder.build(this, context), - copyTo.build(), + MultiFields.empty(), + CopyTo.empty(), this ); } @@ -294,8 +182,24 @@ public FlatObjectFieldMapper build(BuilderContext context) { public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers())); /** - * Field type for flat-object fields - * one flat-object fields contains its own fieldType, one valueFieldType and one valueAndPathFieldType + * Creates a new TypeParser for flatObjectFieldMapper that does not use ParameterizedFieldMapper + */ + public static class TypeParser implements Mapper.TypeParser { + private final BiFunction builderFunction; + + public TypeParser(BiFunction builderFunction) { + this.builderFunction = builderFunction; + } + + @Override + public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { + Builder builder = builderFunction.apply(name, parserContext); + return builder; + } + } + + /** + * flat-object fields type contains its own fieldType, one valueFieldType and one valueAndPathFieldType * @opensearch.internal */ public static final class FlatObjectFieldType extends StringFieldType { @@ -307,30 +211,8 @@ public static final class FlatObjectFieldType extends StringFieldType { private KeywordFieldMapper.KeywordFieldType valueAndPathFieldType; - public FlatObjectFieldType( - String name, - FieldType fieldType, - NamedAnalyzer normalizer, - NamedAnalyzer searchAnalyzer, - Builder builder - ) { - super( - name, - fieldType.indexOptions() != IndexOptions.NONE, - fieldType.stored(), - builder.hasDocValues.getValue(), - new TextSearchInfo(fieldType, builder.similarity.getValue(), searchAnalyzer, searchAnalyzer), - builder.meta.getValue() - ); - setEagerGlobalOrdinals(builder.eagerGlobalOrdinals.getValue()); - setIndexAnalyzer(normalizer); - setBoost(builder.boost.getValue()); - this.ignoreAbove = builder.ignoreAbove.getValue(); - this.nullValue = builder.nullValue.getValue(); - } - public FlatObjectFieldType(String name, boolean isSearchable, boolean hasDocValues, Map meta) { - super(name, isSearchable, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); + super(name, isSearchable, false, true, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); this.ignoreAbove = Integer.MAX_VALUE; this.nullValue = null; @@ -345,7 +227,7 @@ public FlatObjectFieldType(String name, FieldType fieldType) { name, fieldType.indexOptions() != IndexOptions.NONE, false, - false, + true, new TextSearchInfo(fieldType, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER), Collections.emptyMap() ); @@ -384,6 +266,13 @@ NamedAnalyzer normalizer() { return indexAnalyzer(); } + /** + * + * Fielddata is an in-memory data structure that is used for aggregations, sorting, and scripting. + * @param fullyQualifiedIndexName the name of the index this field-data is build for + * @param searchLookup a {@link SearchLookup} supplier to allow for accessing other fields values in the context of runtime fields + * @return IndexFieldData.Builder + */ @Override public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { failIfNoDocValues(); @@ -449,6 +338,134 @@ protected BytesRef indexedValueForSearch(Object value) { return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString()); } + /** + * redirect term query with rewrite value to rewriteSearchValue and directSubFieldName + */ + @Override + public Query termQuery(Object value, @Nullable QueryShardContext context) { + + String searchValueString = ((BytesRef) value).utf8ToString(); + String directSubFieldName = directSubfield(); + String rewriteSearchValue = rewriteValue(searchValueString); + + failIfNotIndexed(); + Query query; + query = new TermQuery(new Term(directSubFieldName, indexedValueForSearch(rewriteSearchValue))); + if (boost() != 1f) { + query = new BoostQuery(query, boost()); + } + return query; + } + + @Override + public Query termsQuery(List values, QueryShardContext context) { + failIfNotIndexed(); + String directedSearchFieldName = directSubfield(); + BytesRef[] bytesRefs = new BytesRef[values.size()]; + for (int i = 0; i < bytesRefs.length; i++) { + String rewriteValues = rewriteValue(((BytesRef) values.get(i)).utf8ToString()); + + bytesRefs[i] = indexedValueForSearch(new BytesRef(rewriteValues)); + + } + + return new TermInSetQuery(directedSearchFieldName, bytesRefs); + } + + /** + * To direch search fields, if a dot path was used in search query, + * then direct to flatObjectFieldName._valueAndPath subfield, + * else, direct to flatObjectFieldName._value subfield. + * @return directedSubFieldName + */ + public String directSubfield() { + if (name().contains(DOT_SYMBOL)) { + String[] dotPathList = name().split("\\."); + return dotPathList[0] + VALUE_AND_PATH_SUFFIX; + } else { + return this.valueFieldType.name(); + } + } + + /** + * If the search key is assigned with value, + * the dot path was used in search query, then + * rewrite the searchValueString as the format "dotpath=value", + * @return rewriteSearchValue + */ + public String rewriteValue(String searchValueString) { + if (!name().contains(DOT_SYMBOL)) { + return searchValueString; + } else { + String rewriteSearchValue = new StringBuilder().append(name()).append(EQUAL_SYMBOL).append(searchValueString).toString(); + return rewriteSearchValue; + } + + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, QueryShardContext context) { + String directSubfield = directSubfield(); + String rewriteValue = rewriteValue(value); + + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[prefix] queries cannot be executed when '" + + ALLOW_EXPENSIVE_QUERIES.getKey() + + "' is set to false. For optimised prefix queries on text " + + "fields please enable [index_prefixes]." + ); + } + failIfNotIndexed(); + if (method == null) { + method = MultiTermQuery.CONSTANT_SCORE_REWRITE; + } + if (caseInsensitive) { + return AutomatonQueries.caseInsensitivePrefixQuery((new Term(directSubfield, indexedValueForSearch(rewriteValue))), method); + } + return new PrefixQuery(new Term(directSubfield, indexedValueForSearch(rewriteValue)), method); + } + + @Override + public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { + String directSubfield = directSubfield(); + String rewriteUpperTerm = rewriteValue(((BytesRef) upperTerm).utf8ToString()); + String rewriteLowerTerm = rewriteValue(((BytesRef) lowerTerm).utf8ToString()); + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[range] queries on [text] or [keyword] fields cannot be executed when '" + + ALLOW_EXPENSIVE_QUERIES.getKey() + + "' is set to false." + ); + } + failIfNotIndexed(); + return new TermRangeQuery( + directSubfield, + lowerTerm == null ? null : indexedValueForSearch(rewriteLowerTerm), + upperTerm == null ? null : indexedValueForSearch(rewriteUpperTerm), + includeLower, + includeUpper + ); + } + + /** + * if there is dot path. query the field name in flatObject parent field. + * else query in _field_names system field + */ + @Override + public Query existsQuery(QueryShardContext context) { + String searchKey; + String searchField; + if (name().contains(DOT_SYMBOL)) { + searchKey = name().split("\\.")[0]; + searchField = name(); + } else { + searchKey = FieldNamesFieldMapper.NAME; + searchField = name(); + } + return new TermQuery(new Term(searchKey, indexedValueForSearch(searchField))); + } + @Override public Query wildcardQuery( String value, @@ -458,27 +475,23 @@ public Query wildcardQuery( ) { // flat-object field types are always normalized, so ignore case sensitivity and force normalize the wildcard // query text - return super.wildcardQuery(value, method, caseInsensitve, true, context); + throw new QueryShardException( + context, + "Can only use wildcard queries on keyword and text fields - not on [" + + name() + + "] which is of type [" + + "flat-object" + + "]" + ); } } - private final boolean indexed; - private final boolean hasDocValues; - private final String nullValue; - private final boolean eagerGlobalOrdinals; - private final int ignoreAbove; - private final String indexOptions; - private final FieldType fieldType; - private final SimilarityProvider similarity; - private final String normalizerName; - private final boolean splitQueriesOnWhitespace; private final ValueFieldMapper valueFieldMapper; private final ValueAndPathFieldMapper valueAndPathFieldMapper; - private final IndexAnalyzers indexAnalyzers; - protected FlatObjectFieldMapper( + FlatObjectFieldMapper( String simpleName, FieldType fieldType, FlatObjectFieldType mappedFieldType, @@ -488,29 +501,13 @@ protected FlatObjectFieldMapper( CopyTo copyTo, Builder builder ) { - super(simpleName, mappedFieldType, multiFields, copyTo); + super(simpleName, fieldType, mappedFieldType, copyTo); assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; - this.indexed = builder.indexed.getValue(); - this.hasDocValues = builder.hasDocValues.getValue(); - this.nullValue = builder.nullValue.getValue(); - this.eagerGlobalOrdinals = builder.eagerGlobalOrdinals.getValue(); - this.ignoreAbove = builder.ignoreAbove.getValue(); - this.indexOptions = builder.indexOptions.getValue(); this.fieldType = fieldType; - this.similarity = builder.similarity.getValue(); - this.normalizerName = builder.normalizer.getValue(); - this.splitQueriesOnWhitespace = builder.splitQueriesOnWhitespace.getValue(); this.indexAnalyzers = builder.indexAnalyzers; this.valueFieldMapper = valueFieldMapper; this.valueAndPathFieldMapper = valueAndPathFieldMapper; - } - - /** - * TODO: Placeholder, this is used at keywordfieldmapper, considering to remove ignoreAbove - * Values that have more chars than the return value of this method will - * be skipped at parsing time. */ - public int ignoreAbove() { - return ignoreAbove; + this.mappedFieldType = mappedFieldType; } @Override @@ -518,6 +515,11 @@ protected FlatObjectFieldMapper clone() { return (FlatObjectFieldMapper) super.clone(); } + @Override + protected void mergeOptions(FieldMapper other, List conflicts) { + + } + @Override public FlatObjectFieldType fieldType() { return (FlatObjectFieldType) super.fieldType(); @@ -530,7 +532,7 @@ protected void parseCreateField(ParseContext context) throws IOException { if (context.externalValueSet()) { value = context.externalValue().toString(); - ParseValueAddFields(context, value, fieldType().name()); + parseValueAddFields(context, value, fieldType().name()); } else { JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser( NamedXContentRegistry.EMPTY, @@ -548,12 +550,10 @@ protected void parseCreateField(ParseContext context) throws IOException { switch (currentToken) { case FIELD_NAME: fieldName = parser.currentName(); - logger.info("fieldName: " + fieldName); break; case VALUE_STRING: value = parser.textOrNull(); - logger.info("value: " + value); - ParseValueAddFields(context, value, fieldName); + parseValueAddFields(context, value, fieldName); break; } @@ -580,10 +580,7 @@ public Iterator iterator() { return concat; } - private void ParseValueAddFields(ParseContext context, String value, String fieldName) throws IOException { - if (value == null || value.length() > ignoreAbove) { - return; - } + private void parseValueAddFields(ParseContext context, String value, String fieldName) throws IOException { NamedAnalyzer normalizer = fieldType().normalizer(); if (normalizer != null) { @@ -592,18 +589,16 @@ private void ParseValueAddFields(ParseContext context, String value, String fiel String[] valueTypeList = fieldName.split("\\._"); String valueType = "._" + valueTypeList[valueTypeList.length - 1]; - logger.info("valueType: " + valueType); + /** * the JsonToStringXContentParser returns XContentParser with 3 string fields * fieldName, fieldName._value, fieldName._valueAndPath */ if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { - logger.info("FlatObjectField name is " + fieldType().name()); - logger.info("FlatObjectField value is " + value); // convert to utf8 only once before feeding postings/dv/stored fields - final BytesRef binaryValue = new BytesRef(value); + final BytesRef binaryValue = new BytesRef(fieldType().name() + DOT_SYMBOL + value); Field field = new FlatObjectField(fieldType().name(), binaryValue, fieldType); if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { @@ -621,18 +616,15 @@ private void ParseValueAddFields(ParseContext context, String value, String fiel } if (valueType.equals(VALUE_SUFFIX)) { if (valueFieldMapper != null) { - logger.info("valueFieldMapper value is " + value); valueFieldMapper.addField(context, value); } } if (valueType.equals(VALUE_AND_PATH_SUFFIX)) { if (valueAndPathFieldMapper != null) { - logger.info("valueAndPathFieldMapper value is " + value); valueAndPathFieldMapper.addField(context, value); } } - // TODo: to revisit if flat-object needs docValues. if (fieldType().hasDocValues()) { if (context.doc().getField(fieldType().name()) == null || !context.doc().getFields(fieldType().name()).equals(field)) { context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); @@ -679,12 +671,6 @@ protected String contentType() { return CONTENT_TYPE; } - @Override - public ParametrizedFieldMapper.Builder getMergeBuilder() { - return new Builder(simpleName(), indexAnalyzers).init(this); - } - - // TODO Further simplify the code by new KeyWordFieldMapper to be ValueAndPathFieldMapper and ValueFieldMapper private static final class ValueAndPathFieldMapper extends FieldMapper { protected ValueAndPathFieldMapper(FieldType fieldType, KeywordFieldMapper.KeywordFieldType mappedFieldType) { @@ -692,11 +678,9 @@ protected ValueAndPathFieldMapper(FieldType fieldType, KeywordFieldMapper.Keywor } void addField(ParseContext context, String value) { - // context.doc().add(new Field(fieldType().name(), value, fieldType)); final BytesRef binaryValue = new BytesRef(value); if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { Field field = new KeywordFieldMapper.KeywordField(fieldType().name(), binaryValue, fieldType); - // Field field = new (fieldType().name()+VALUE_AND_PATH_SUFFIX, binaryValue, fieldType); context.doc().add(field); @@ -725,6 +709,7 @@ protected String contentType() { public String toString() { return fieldType().toString(); } + } private static final class ValueFieldMapper extends FieldMapper { diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java index 5310e1b1e8397..696536cb85c9d 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesModule.java +++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java @@ -51,6 +51,7 @@ import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.mapper.FieldAliasMapper; import org.opensearch.index.mapper.FieldNamesFieldMapper; +import org.opensearch.index.mapper.FlatObjectFieldMapper; import org.opensearch.index.mapper.GeoPointFieldMapper; import org.opensearch.index.mapper.IdFieldMapper; import org.opensearch.index.mapper.IgnoredFieldMapper; @@ -162,6 +163,7 @@ public static Map getMappers(List mappe mappers.put(CompletionFieldMapper.CONTENT_TYPE, CompletionFieldMapper.PARSER); mappers.put(FieldAliasMapper.CONTENT_TYPE, new FieldAliasMapper.TypeParser()); mappers.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser()); + mappers.put(FlatObjectFieldMapper.CONTENT_TYPE, FlatObjectFieldMapper.PARSER); for (MapperPlugin mapperPlugin : mapperPlugins) { for (Map.Entry entry : mapperPlugin.getMappers().entrySet()) { diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java new file mode 100644 index 0000000000000..c1a506481fd55 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.Strings; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.json.JsonXContent; + +import java.io.IOException; + +import static org.hamcrest.core.IsEqual.equalTo; +import static org.hamcrest.core.StringContains.containsString; + +public class FlatObjectFieldMapperTests extends MapperServiceTestCase { + private static final String FIELD_TYPE = "flat-object"; + + // @Override + public FlatObjectFieldMapper.Builder newBuilder() { + return new FlatObjectFieldMapper.Builder("flat-object"); + } + + public final void testExistsQueryDocValuesDisabledWithNorms() throws IOException { + MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); })); + assertParseMinimalWarnings(); + } + + public void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", FIELD_TYPE); + } + + /** + * Writes a sample value for the field to the provided {@link XContentBuilder}. + * @param builder builder + */ + + protected void writeFieldValue(XContentBuilder builder) throws IOException { + builder.startObject(); + builder.field("foo", "bar"); + builder.endObject(); + } + + public final void testEmptyName() { + MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(mapping(b -> { + b.startObject(""); + minimalMapping(b); + b.endObject(); + }))); + assertThat(e.getMessage(), containsString("name cannot be empty string")); + assertParseMinimalWarnings(); + } + + public void testMinimalToMaximal() throws IOException { + XContentBuilder orig = JsonXContent.contentBuilder().startObject(); + createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, ToXContent.EMPTY_PARAMS); + orig.endObject(); + XContentBuilder parsedFromOrig = JsonXContent.contentBuilder().startObject(); + createMapperService(orig).documentMapper().mapping().toXContent(parsedFromOrig, ToXContent.EMPTY_PARAMS); + parsedFromOrig.endObject(); + assertEquals(Strings.toString(orig), Strings.toString(parsedFromOrig)); + assertParseMaximalWarnings(); + } + + public void testDefaults() throws Exception { + XContentBuilder mapping = fieldMapping(this::minimalMapping); + DocumentMapper mapper = createDocumentMapper(mapping); + assertEquals(Strings.toString(mapping), mapper.mappingSource().toString()); + + String json = Strings.toString( + XContentFactory.jsonBuilder().startObject().startObject("field").field("foo", "bar").endObject().endObject() + ); + + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.foo"), fields[0].binaryValue()); + + IndexableFieldType fieldType = fields[0].fieldType(); + assertFalse(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + + } + + public void testNullValue() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + MapperParsingException e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.nullField("field")))); + assertThat(e.getMessage(), containsString("object mapping for [_doc] tried to parse field [field] as object")); + + } + + protected void assertParseMinimalWarnings() { + // Most mappers don't emit any warnings + } + + protected void assertParseMaximalWarnings() { + // Most mappers don't emit any warnings + } + +} From 86b22d91fb2d553e9119d75202647ebb28e97dc6 Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Wed, 22 Mar 2023 00:23:05 -0700 Subject: [PATCH 04/25] Rename flat-object to flat_object and fix CI tests Signed-off-by: Mingshi Liu --- CHANGELOG.md | 2 +- .../mapper/FlatObjectMappingBenchmark.java | 2 +- .../index/mapper/FlatObjectFieldMapper.java | 55 ++++++++----------- .../mapper/FlatObjectFieldMapperTests.java | 4 +- .../SignificantTermsAggregatorTests.java | 4 +- .../terms/SignificantTextAggregatorTests.java | 4 +- 6 files changed, 33 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0942cd93d4a6..92b4a1ddbb791 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,7 +77,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Changed - Require MediaType in Strings.toString API ([#6009](https://github.com/opensearch-project/OpenSearch/pull/6009)) - [Refactor] XContent base classes from xcontent to core library ([#5902](https://github.com/opensearch-project/OpenSearch/pull/5902)) -- Introduce a new field type: flat-object (TODO: update the link!) ([#1018](https://github.com/opensearch-project/OpenSearch/issues/1018)) +- Introduce a new field type: flat_object ([#6507](https://github.com/opensearch-project/OpenSearch/pull/6507)) ### Deprecated diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java index 32250edf0b0c6..46e779a6f960f 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java @@ -220,7 +220,7 @@ private static void GetFlatObjectIndex(MyState state, String indexName, String f .startObject() .startObject("properties") .startObject(flatObjectFieldName) - .field("type", "flat-object") + .field("type", "flat_object") .endObject() .endObject() .endObject() diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 76784a17c16e2..db02dd486f2ed 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -54,24 +54,21 @@ import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES; /** - * A field mapper for flat-objects. This mapper accepts JSON object and treat as string fields in one index. + * A field mapper for flat_objects. This mapper accepts JSON object and treat as string fields in one index. + * A flat_object field contains one parent field itself and two substring fields: + * field._valueAndPath and field._value * @opensearch.internal */ public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper { - /** - * A flat-object mapping contains one parent field itself and two substring fields, - * field._valueAndPath and field._value - */ - - public static final String CONTENT_TYPE = "flat-object"; + public static final String CONTENT_TYPE = "flat_object"; private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; private static final String VALUE_SUFFIX = "._value"; private static final String DOT_SYMBOL = "."; private static final String EQUAL_SYMBOL = "="; /** - * In flat-object field mapper, field type is similar to keyword field type + * In flat_object field mapper, field type is similar to keyword field type * Cannot be tokenized, can OmitNorms, and can setIndexOption. * @opensearch.internal */ @@ -108,8 +105,8 @@ private static FlatObjectFieldMapper toType(FieldMapper in) { } /** - * The builder for the flat-object field mapper using default parameters as - * indexed: flat-object field mapper is default to be indexed. + * The builder for the flat_object field mapper using default parameters as + * indexed: flat_object field mapper is default to be indexed. * hasDocValues: to store index and support efficient access to individual field values. * stored: the original value of the field is not stored in the index. * nullValue: not accept null value @@ -199,7 +196,7 @@ public Mapper.Builder parse(String name, Map node, ParserCont } /** - * flat-object fields type contains its own fieldType, one valueFieldType and one valueAndPathFieldType + * flat_object fields type contains its own fieldType, one valueFieldType and one valueAndPathFieldType * @opensearch.internal */ public static final class FlatObjectFieldType extends StringFieldType { @@ -313,7 +310,7 @@ public Object valueForDisplay(Object value) { if (value == null) { return null; } - // flat-objects are internally stored as utf8 bytes + // flat_objects are internally stored as utf8 bytes BytesRef binaryValue = (BytesRef) value; return binaryValue.utf8ToString(); } @@ -321,7 +318,7 @@ public Object valueForDisplay(Object value) { @Override protected BytesRef indexedValueForSearch(Object value) { if (getTextSearchInfo().getSearchAnalyzer() == Lucene.KEYWORD_ANALYZER) { - // flat-object analyzer with the default attribute source which encodes terms using UTF8 + // flat_object analyzer with the default attribute source which encodes terms using UTF8 // in that case we skip normalization, which may be slow if there many terms need to // parse (eg. large terms query) since Analyzer.normalize involves things like creating // attributes through reflection @@ -339,7 +336,7 @@ protected BytesRef indexedValueForSearch(Object value) { } /** - * redirect term query with rewrite value to rewriteSearchValue and directSubFieldName + * redirect queries with rewrite value to rewriteSearchValue and directSubFieldName */ @Override public Query termQuery(Object value, @Nullable QueryShardContext context) { @@ -473,15 +470,11 @@ public Query wildcardQuery( boolean caseInsensitve, QueryShardContext context ) { - // flat-object field types are always normalized, so ignore case sensitivity and force normalize the wildcard + // flat_object field types are always normalized, so ignore case sensitivity and force normalize the wildcard // query text throw new QueryShardException( context, - "Can only use wildcard queries on keyword and text fields - not on [" - + name() - + "] which is of type [" - + "flat-object" - + "]" + "Can only use wildcard queries on keyword and text fields - not on [" + name() + "] which is of type [" + typeName() + "]" ); } @@ -542,6 +535,7 @@ protected void parseCreateField(ParseContext context) throws IOException { ); /** * JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser + * It reads the JSON object and parsed to a list of string */ XContentParser parser = JsonToStringParser.parseObject(); @@ -580,6 +574,15 @@ public Iterator iterator() { return concat; } + /** + * parseValueAddFields method will store data to Lucene. + * the JsonToStringXContentParser returns XContentParser with 3 string fields + * fieldName, fieldName._value, fieldName._valueAndPath. + * parseValueAddFields recognized string by the stringfield name, + * fieldName will be store through the parent FlatObjectFieldMapper,which contains all the keys + * fieldName._value will be store through the valueFieldMapper, which contains the values of the Json Object + * fieldName._valueAndPath will be store through the valueAndPathFieldMapper, which contains the "path=values" format + */ private void parseValueAddFields(ParseContext context, String value, String fieldName) throws IOException { NamedAnalyzer normalizer = fieldType().normalizer(); @@ -590,11 +593,6 @@ private void parseValueAddFields(ParseContext context, String value, String fiel String[] valueTypeList = fieldName.split("\\._"); String valueType = "._" + valueTypeList[valueTypeList.length - 1]; - /** - * the JsonToStringXContentParser returns XContentParser with 3 string fields - * fieldName, fieldName._value, fieldName._valueAndPath - */ - if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { // convert to utf8 only once before feeding postings/dv/stored fields @@ -604,13 +602,6 @@ private void parseValueAddFields(ParseContext context, String value, String fiel if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { createFieldNamesField(context); } - /** - * Indentified by the stringfield name, - * fieldName will be store through the parent FlatFieldMapper,which contains all the keys - * fieldName._value will be store through the valueFieldMapper, which contains the values of the Json Object - * fieldName._valueAndPath will be store through the valueAndPathFieldMapper, which contains the values of - * the Json Object. - */ if (fieldName.equals(fieldType().name())) { context.doc().add(field); } diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java index c1a506481fd55..75c9fd776900d 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -25,11 +25,11 @@ import static org.hamcrest.core.StringContains.containsString; public class FlatObjectFieldMapperTests extends MapperServiceTestCase { - private static final String FIELD_TYPE = "flat-object"; + private static final String FIELD_TYPE = "flat_object"; // @Override public FlatObjectFieldMapper.Builder newBuilder() { - return new FlatObjectFieldMapper.Builder("flat-object"); + return new FlatObjectFieldMapper.Builder(FIELD_TYPE); } public final void testExistsQueryDocValuesDisabledWithNorms() throws IOException { diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java index 883196d290154..0f40c8602c809 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java @@ -53,6 +53,7 @@ import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.mapper.BinaryFieldMapper; +import org.opensearch.index.mapper.FlatObjectFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType; @@ -103,7 +104,8 @@ protected List unsupportedMappedFieldTypes() { NumberFieldMapper.NumberType.DOUBLE.typeName(), // floating points are not supported at all NumberFieldMapper.NumberType.FLOAT.typeName(), NumberFieldMapper.NumberType.HALF_FLOAT.typeName(), - BinaryFieldMapper.CONTENT_TYPE // binary fields are not supported because they cannot be searched + BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they cannot be searched + FlatObjectFieldMapper.CONTENT_TYPE // flat_object fields does not support aggregations ); } diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java index ce5c361ffcf69..e9b2d40fd4ede 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java @@ -50,6 +50,7 @@ import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.mapper.BinaryFieldMapper; +import org.opensearch.index.mapper.FlatObjectFieldMapper; import org.opensearch.index.mapper.GeoPointFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.TextFieldMapper; @@ -102,7 +103,8 @@ protected List getSupportedValuesSourceTypes() { protected List unsupportedMappedFieldTypes() { return Arrays.asList( BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers - GeoPointFieldMapper.CONTENT_TYPE // geopoint fields cannot use term queries + GeoPointFieldMapper.CONTENT_TYPE, // geopoint fields cannot use term queries + FlatObjectFieldMapper.CONTENT_TYPE // flat_object fields are not supported aggregations ); } From 70b680f32d294dcf706102ad2c961d118667812c Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Wed, 22 Mar 2023 13:50:10 -0700 Subject: [PATCH 05/25] Organized package Signed-off-by: Mingshi Liu --- benchmarks/build.gradle | 2 -- .../common/xcontent/JsonToStringXContentParser.java | 2 +- .../org/opensearch/index/mapper/FlatObjectFieldMapper.java | 6 +++--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle index 803d8777592c6..2c5456abc434f 100644 --- a/benchmarks/build.gradle +++ b/benchmarks/build.gradle @@ -40,7 +40,6 @@ archivesBaseName = 'opensearch-benchmarks' test.enabled = false dependencies { -// implementation project(":dependencies:client:opensearch:opensearch-rest-high-level-client") api( project(":server")) { // JMH ships with the conflicting version 4.6. This prevents us from using jopt-simple in benchmarks (which should be ok) but allows // us to invoke the JMH uberjar as usual. @@ -51,7 +50,6 @@ dependencies { api "org.apache.httpcomponents:httpclient:${versions.httpclient}" api "org.openjdk.jmh:jmh-core:$versions.jmh" implementation 'org.json:json:20210307' -// implementation 'org.apache.httpcomponents:httpcore5:5.1.3' annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:$versions.jmh" // Dependencies of JMH runtimeOnly 'net.sf.jopt-simple:jopt-simple:5.0.4' diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index a4b78c0ee575a..89f0254de44e9 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -33,7 +33,7 @@ import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.xcontent.json.JsonXContent; -import org.opensearch.common.xcontent.support.AbstractXContentParser; +import org.opensearch.core.xcontent.*; import org.opensearch.index.mapper.ParseContext; import java.io.IOException; import java.nio.CharBuffer; diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index db02dd486f2ed..100c8c6b3e079 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -28,9 +28,9 @@ import org.opensearch.common.collect.Iterators; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.search.AutomatonQueries; -import org.opensearch.common.xcontent.DeprecationHandler; -import org.opensearch.common.xcontent.NamedXContentRegistry; -import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; import org.opensearch.common.xcontent.JsonToStringXContentParser; import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; From da425e40661a1b18e9fa4a32c4fb949a7b90cf27 Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Wed, 22 Mar 2023 14:36:50 -0700 Subject: [PATCH 06/25] resolved compile error Signed-off-by: Mingshi Liu --- .../benchmark/index/mapper/FlatObjectMappingBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java index 46e779a6f960f..24ee6512aa7fc 100644 --- a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java +++ b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java @@ -68,7 +68,7 @@ public void setup() throws Exception { throw new IllegalArgumentException("opensearch.uri system property not set"); } - this.client = new RestHighLevelClient(RestClient.builder(HttpHost.create(httpUri))); + this.client = new RestHighLevelClient(RestClient.builder(String.valueOf(HttpHost.create(httpUri)))); } From 4a7b5036219d91f6e5a4bd2373216f4d1bf1aa4e Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Wed, 22 Mar 2023 14:55:26 -0700 Subject: [PATCH 07/25] organize package Signed-off-by: Mingshi Liu --- .../common/xcontent/JsonToStringXContentParser.java | 7 ++++++- .../index/mapper/FlatObjectFieldMapperTests.java | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 89f0254de44e9..3468f4dfd6c54 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -33,7 +33,12 @@ import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.xcontent.json.JsonXContent; -import org.opensearch.core.xcontent.*; +import org.opensearch.core.xcontent.AbstractXContentParser; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentLocation; +import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.mapper.ParseContext; import java.io.IOException; import java.nio.CharBuffer; diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java index 75c9fd776900d..8e175f4a5cf8c 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -14,8 +14,8 @@ import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.util.BytesRef; import org.opensearch.common.Strings; -import org.opensearch.common.xcontent.ToXContent; -import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.common.xcontent.json.JsonXContent; From e98f06be90b6df81ed2a3ecb7c91ca9f33b1da05 Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Thu, 23 Mar 2023 16:13:51 -0700 Subject: [PATCH 08/25] Add integration tests and remove benchmark Signed-off-by: Mingshi Liu --- CHANGELOG.md | 2 +- benchmarks/build.gradle | 5 - .../mapper/FlatObjectMappingBenchmark.java | 363 ------------------ .../test/index/90_flat_object.yml | 231 +++++++++++ .../xcontent/JsonToStringXContentParser.java | 20 +- 5 files changed, 242 insertions(+), 379 deletions(-) delete mode 100644 benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e9a3e49ad02c..55ebad8d69a6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -99,7 +99,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Changed - Require MediaType in Strings.toString API ([#6009](https://github.com/opensearch-project/OpenSearch/pull/6009)) - [Refactor] XContent base classes from xcontent to core library ([#5902](https://github.com/opensearch-project/OpenSearch/pull/5902)) -- Introduce a new field type: flat_object ([#6507](https://github.com/opensearch-project/OpenSearch/pull/6507)) +- Added a new field type: flat_object ([#6507](https://github.com/opensearch-project/OpenSearch/pull/6507)) ### Deprecated - Map, List, and Set in org.opensearch.common.collect ([#6609](https://github.com/opensearch-project/OpenSearch/pull/6609)) diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle index 2c5456abc434f..4a7825e9ba35b 100644 --- a/benchmarks/build.gradle +++ b/benchmarks/build.gradle @@ -45,16 +45,11 @@ dependencies { // us to invoke the JMH uberjar as usual. exclude group: 'net.sf.jopt-simple', module: 'jopt-simple' } - api(project(":client:rest-high-level")) - api "org.apache.httpcomponents:httpcore:${versions.httpcore}" - api "org.apache.httpcomponents:httpclient:${versions.httpclient}" api "org.openjdk.jmh:jmh-core:$versions.jmh" - implementation 'org.json:json:20210307' annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:$versions.jmh" // Dependencies of JMH runtimeOnly 'net.sf.jopt-simple:jopt-simple:5.0.4' runtimeOnly 'org.apache.commons:commons-math3:3.6.1' - } // enable the JMH's BenchmarkProcessor to generate the final benchmark classes diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java deleted file mode 100644 index 24ee6512aa7fc..0000000000000 --- a/benchmarks/src/main/java/org/opensearch/benchmark/index/mapper/FlatObjectMappingBenchmark.java +++ /dev/null @@ -1,363 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.benchmark.index.mapper; - -import org.apache.http.HttpHost; -import org.json.JSONObject; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.annotations.Warmup; -import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; -import org.opensearch.action.admin.indices.refresh.RefreshRequest; -import org.opensearch.action.admin.indices.refresh.RefreshResponse; -import org.opensearch.action.index.IndexRequest; -import org.opensearch.action.index.IndexResponse; -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.action.support.master.AcknowledgedResponse; -import org.opensearch.benchmark.index.mapper.FlatObjectMappingBenchmark.MyState; -import org.opensearch.client.RequestOptions; -import org.opensearch.client.RestClient; -import org.opensearch.client.RestHighLevelClient; -import org.opensearch.client.indices.CreateIndexRequest; -import org.opensearch.client.indices.CreateIndexResponse; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.common.xcontent.XContentType; -import org.opensearch.index.query.QueryBuilders; -import org.opensearch.search.SearchHits; -import org.opensearch.search.builder.SearchSourceBuilder; - -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.Random; -import java.util.concurrent.TimeUnit; - -@State(Scope.Thread) -@Fork(1) -@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) -@Measurement(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS) - -public class FlatObjectMappingBenchmark { - - @State(Scope.Thread) - public static class MyState { - private RestHighLevelClient client; - - @Setup(Level.Trial) - public void setup() throws Exception { - String httpUri = System.getProperty("opensearch.uri", "http://localhost:9200"); - if (httpUri == null || httpUri.trim().isEmpty()) { - throw new IllegalArgumentException("opensearch.uri system property not set"); - } - - this.client = new RestHighLevelClient(RestClient.builder(String.valueOf(HttpHost.create(httpUri)))); - - } - - @TearDown(Level.Trial) - public void tearDown() throws Exception { - this.client.close(); - } - } - - /** - * DynamicIndex: - * create index and delete index - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void CreateDynamicIndex(MyState state) throws IOException { - GetDynamicIndex(state, "demo-dynamic-test"); - DeleteIndex(state, "demo-dynamic-test"); - } - - /** - * FlatObjectIndex: - * create index and delete index - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void CreateFlatObjectIndex(MyState state) throws IOException { - GetFlatObjectIndex(state, "demo-flat-object-test", "host"); - DeleteIndex(state, "demo-flat-object-test"); - } - - /** - * DynamicIndex: - * create index, upload one document and delete index - */ - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void indexDynamicMapping(MyState state) throws IOException { - GetDynamicIndex(state, "demo-dynamic-test1"); - String doc = - "{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; - UploadDoc(state, "demo-dynamic-test1", doc); - DeleteIndex(state, "demo-dynamic-test1"); - } - - /** - * FlatObjectIndex: - * create index, upload one document and delete index - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void indexFlatObjectMapping(MyState state) throws IOException, URISyntaxException { - GetFlatObjectIndex(state, "demo-flat-object-test1", "host"); - String doc = - "{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; - UploadDoc(state, "demo-flat-object-test1", doc); - DeleteIndex(state, "demo-flat-object-test1"); - } - - /** - * DynamicIndex: - * create index, upload one document, search for document and delete index - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void searchDynamicMapping(MyState state) throws IOException { - String indexName = "demo-dynamic-test2"; - GetDynamicIndex(state, indexName); - String doc = - "{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; - UploadDoc(state, indexName, doc); - SearchDoc(state, indexName, "host.hostname", "bionic", "@timestamp", "message"); - DeleteIndex(state, indexName); - } - - /** - * FlatObjectIndex: - * create index, upload one document, search for document and delete index - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void searchFlatObjectMapping(MyState state) throws IOException { - GetFlatObjectIndex(state, "demo-flat-object-test2", "host"); - String doc = - "{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }"; - UploadDoc(state, "demo-flat-object-test2", doc); - SearchDoc(state, "demo-flat-object-test2", "host.hostname", "name", "@timestamp", "message"); - DeleteIndex(state, "demo-flat-object-test2"); - } - - /** - * DynamicIndex: - * create index, upload a nested document in 100 levels, and each level with 10 fields, - * search for document and delete index - * Caught exceptions with the number of fields over 1000 - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void searchDynamicMappingWithOneHundredNestedJSON(MyState state) throws IOException { - String indexName = "demo-dynamic-test3"; - GetDynamicIndex(state, indexName); - String doc = GenerateRandomJson(10, "nested"); - Map searchValueAndPath = findNestedValueAndPath(doc, 26, ""); - String searchValue = searchValueAndPath.get("value"); - String searchFieldName = searchValueAndPath.get("path"); - UploadDoc(state, indexName, doc); - SearchDoc(state, indexName, searchFieldName, searchValue, searchFieldName, searchFieldName); - DeleteIndex(state, indexName); - } - - /** - * debug search in dotpath - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public void searchFlatObjectMappingInValueWithOneHundredNestedJSON(MyState state) throws IOException { - String indexName = "demo-flat-object-test4"; - GetFlatObjectIndex(state, indexName, "nested0"); - String doc = GenerateRandomJson(10, "nested"); - Map searchValueAndPath = findNestedValueAndPath(doc, 26, ""); - String SearchRandomWord = searchValueAndPath.get("value"); - String SearchRandomPath = searchValueAndPath.get("path"); - String searchFieldName = "nested0"; - UploadDoc(state, indexName, doc); - SearchDoc(state, indexName, SearchRandomPath, SearchRandomWord, searchFieldName, searchFieldName); - DeleteIndex(state, indexName); - } - - private static void GetDynamicIndex(MyState state, String indexName) throws IOException { - CreateIndexRequest dynamicRequest = new CreateIndexRequest(indexName); - CreateIndexResponse dynamicResponse = state.client.indices().create(dynamicRequest, RequestOptions.DEFAULT); - if (!dynamicResponse.isAcknowledged()) { - System.out.println("Failed to create index"); - } - } - - private static void GetFlatObjectIndex(MyState state, String indexName, String flatObjectFieldName) throws IOException { - CreateIndexRequest flatRequest = new CreateIndexRequest(indexName).mapping( - XContentFactory.jsonBuilder() - .startObject() - .startObject("properties") - .startObject(flatObjectFieldName) - .field("type", "flat_object") - .endObject() - .endObject() - .endObject() - ); - - CreateIndexResponse flatResponse = state.client.indices().create(flatRequest, RequestOptions.DEFAULT); - - if (flatResponse.isAcknowledged()) {} else { - System.out.println("Failed to create index"); - } - } - - private static void DeleteIndex(MyState state, String indexName) throws IOException { - DeleteIndexRequest dynamicDeleteRequest = new DeleteIndexRequest(indexName); - AcknowledgedResponse dynamicDeleteResponse = state.client.indices().delete(dynamicDeleteRequest, RequestOptions.DEFAULT); - if (dynamicDeleteResponse.isAcknowledged()) {} else { - System.out.println("Failed to delete index"); - } - } - - private static void UploadDoc(MyState state, String indexName, String doc) throws IOException { - IndexRequest request = new IndexRequest(indexName); - request.source(doc, XContentType.JSON); - IndexResponse indexResponse = state.client.index(request, RequestOptions.DEFAULT); - if (!indexResponse.status().toString().equals("CREATED")) { - System.out.println("Index status is " + indexResponse.status()); - } else { - - } - } - - private static void SearchDoc( - MyState state, - String indexName, - String searchFieldName, - String searchText, - String sortFieldName, - String highlightFieldName - ) throws IOException { - // Refresh the index before searching - RefreshRequest refreshRequest = new RefreshRequest(indexName); - RefreshResponse refreshResponse = state.client.indices().refresh(refreshRequest, RequestOptions.DEFAULT); - if (!refreshResponse.getStatus().toString().equals("OK")) { - System.out.println("refreshResponse: " + refreshResponse.getStatus()); - } - - SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); - sourceBuilder.query(QueryBuilders.matchQuery(searchFieldName, searchText)); - sourceBuilder.from(0); - sourceBuilder.size(10); - SearchRequest searchRequest = new SearchRequest(indexName); - searchRequest.source(sourceBuilder); - SearchResponse SearchResponse = state.client.search(searchRequest, RequestOptions.DEFAULT); - if (!SearchResponse.status().toString().equals("OK")) { - SearchHits hits = SearchResponse.getHits(); - long totalHits = hits.getTotalHits().value; - if (totalHits == 0) { - throw new IOException("No hit is found"); - } - } - } - - private static String GenerateRandomJson(int numberOfNestedLevel, String subObjectName) { - JSONObject json = new JSONObject(); - Random random = new Random(); - - // Create nested levels - - for (int i = 0; i < numberOfNestedLevel; i++) { - JSONObject nestedObject = new JSONObject(); - - // Add 10 fields to each nested level - for (int j = 0; j < 10; j++) { - String field = "field" + i + j; - String value = generateRandomString(random); - nestedObject.put(field, value); - } - - // Add the nested object to the parent object - String nestedObjectName = subObjectName + i; - json.put(nestedObjectName, nestedObject); - } - - // Return the JSON document as a string - JSONObject returnJson = new JSONObject(); - returnJson.put(subObjectName + "0", json); - return returnJson.toString(); - } - - private static String generateRandomString(Random random) { - String alphabet = "abcdefghijklmnopqrstuvwxyz"; - int length = 10; - StringBuilder randomString = new StringBuilder(); - for (int i = 0; i < length; i++) { - randomString.append(alphabet.charAt(random.nextInt(alphabet.length()))); - randomString.append(random.nextInt(10)); - } - return randomString.toString(); - } - - private static Map findNestedValueAndPath(String randomJsonString, int levelNumber, String currentPath) { - JSONObject jsonObject = new JSONObject(randomJsonString); - String targetKey = "field" + levelNumber; - Map result = new HashMap<>(); - Iterator keys = jsonObject.keys(); - while (keys.hasNext()) { - String key = keys.next(); - Object value = jsonObject.get(key); - if (key.equals(targetKey)) { - result.put("value", value.toString()); - if (currentPath.length() == 0) { - currentPath = key; - } - result.put("path", currentPath + "." + key); - return result; - } - - if (value instanceof JSONObject) { - if (currentPath.length() == 0) { - currentPath = key; - } else { - if (currentPath.contains(".") && currentPath.split("\\.").length > 1) { - int pathLength = currentPath.split("\\.").length; - currentPath = "nested0." + key; - } else { - currentPath = currentPath + "." + key; - } - - } - Map nestedResult = findNestedValueAndPath(value.toString(), levelNumber, currentPath); - if (!nestedResult.isEmpty()) { - return nestedResult; - } - } - } - - return result; - } - -} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml new file mode 100644 index 0000000000000..6595c1944c164 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml @@ -0,0 +1,231 @@ +--- +# Create flat_object mapping +setup: + - do: + indices.create: + index: test + body: + mappings: + properties: + ISBN13: + type : "keyword" + catalog: + type : "flat-object" + +# Delete Index when connection is teardown +--- +teardown: + - do: + indices.delete: + index: test + +# Upload a sample document to index +--- +"Flat object test": + - do: + index: + index: test + id: 1 + body: { + "ISBN13": "V9781933988177", + "catalog": { + "title": "Lucene in Action", + "author": + { + "surname": "McCandless", + "given": "Mike" + }, + "catalogId":"c-0002" + } + } + + # Do index refresh + - do: + indices.refresh: + index: test + + # Verify that mapping under the catalog field did not expand. + # Verify that there are no dynamic fields created. + # https://github.com/opensearch-project/OpenSearch/tree/main/rest-api-spec/src/main/resources/rest-api-spec/test#length + - do: + indices.get_mapping: + index: test + - is_true: test.mappings + - match: { test.mappings.properties.ISBN13.type: keyword } + - match: { test.mappings.properties.catalog.type: flat-object } + - length: { test.mappings.properties: 2 } + - length: { test.mappings.properties.catalog: 1 } + + # Verify Document Count + - do: + search: + body: { + query: { + match_all: {} + } + } + + - length: { hits.hits: 1 } + + # Match Query with dot path. + - do: + search: + body: { + _source: true, + query: { + match: { catalog.title: "Lucene in Action"} + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.title: "Lucene in Action" } + + # Match Query without dot path. + - do: + search: + body: { + _source: true, + query: { + match: { catalog: "Lucene in Action"} + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.title: "Lucene in Action" } + + # Term Query with dot path + - do: + search: + body: { + _source: true, + query: { + term: { catalog.title: "Lucene in Action"} + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.title: "Lucene in Action" } + + # Term Query with dot path. + - do: + search: + body: { + _source: true, + query: { + term: { catalog.author.given: "Mike" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Term Query without dot path. + - do: + search: + body: { + _source: true, + query: { + term: { catalog: "Mike" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Prefix Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "prefix": { + "catalog.author.given": { + "value": "Mi" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Prefix Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "prefix": { + "catalog": { + "value": "Mi" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Range Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog.catalogId": { + "gte": "c-0000", + "lte": "c-0006" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.catalogId: "c-0002" } + + # Range Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog": { + "gte": "c-0000", + "lte": "c-0006" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.catalogId: "c-0002" } + + # Exists Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "exists": { + "field": catalog.catalogId + } + } + } + + - length: { hits.hits: 1 } + + # Exists Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "exists": { + "field": catalog + } + } + } + + - length: { hits.hits: 1 } diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 3468f4dfd6c54..b8be530f00718 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -86,7 +86,8 @@ public JsonToStringXContentParser( public XContentParser parseObject() throws IOException { builder.startObject(); - parseToken(); + StringBuilder path = new StringBuilder(fieldTypeName); + parseToken(path); builder.field(this.fieldTypeName, keyList); builder.field(this.fieldTypeName + VALUE_SUFFIX, valueList); builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, valueAndPathList); @@ -95,27 +96,26 @@ public XContentParser parseObject() throws IOException { return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString)); } - private void parseToken() throws IOException { + private void parseToken(StringBuilder path) throws IOException { String currentFieldName; while (this.parser.nextToken() != Token.END_OBJECT) { - currentFieldName = this.parser.currentName(); StringBuilder parsedFields = new StringBuilder(); - StringBuilder path = new StringBuilder(fieldTypeName); if (this.parser.nextToken() == Token.START_OBJECT) { - // TODO: to convert the entire JsonObject as string without changing the tokenizer position. + // TODO: consider to store the entire JsonObject at StartObject as string without changing the tokenizer position. path.append(DOT_SYMBOL + currentFieldName); - parsedFields.append(this.parser.toString()); this.keyList.add(currentFieldName); - this.valueList.add(parsedFields.toString()); - this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields.toString()); - parseToken(); + parseToken(path); } else { path.append(DOT_SYMBOL + currentFieldName); parseValue(currentFieldName, parsedFields); this.keyList.add(currentFieldName); this.valueList.add(parsedFields.toString()); - this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields.toString()); + this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); + int dotIndex = path.lastIndexOf(DOT_SYMBOL); + if (dotIndex != -1) { + path.delete(dotIndex, path.length()); + } } } From bdd5e29395687530a14ea4f338111ab59fb0de75 Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Thu, 23 Mar 2023 19:59:35 -0700 Subject: [PATCH 09/25] fix IT tests Signed-off-by: Mingshi Liu --- .../rest-api-spec/test/index/90_flat_object.yml | 12 ++++++------ .../common/xcontent/JsonToStringXContentParser.java | 4 ++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml index 6595c1944c164..017843930527f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml @@ -10,7 +10,7 @@ setup: ISBN13: type : "keyword" catalog: - type : "flat-object" + type : "flat_object" # Delete Index when connection is teardown --- @@ -52,7 +52,7 @@ teardown: index: test - is_true: test.mappings - match: { test.mappings.properties.ISBN13.type: keyword } - - match: { test.mappings.properties.catalog.type: flat-object } + - match: { test.mappings.properties.catalog.type: flat_object } - length: { test.mappings.properties: 2 } - length: { test.mappings.properties.catalog: 1 } @@ -73,7 +73,7 @@ teardown: body: { _source: true, query: { - match: { catalog.title: "Lucene in Action"} + match: { "catalog.title": "Lucene in Action"} } } @@ -93,7 +93,7 @@ teardown: - length: { hits.hits: 1 } - match: { hits.hits.0._source.catalog.title: "Lucene in Action" } - # Term Query with dot path + # Term Query1 with dot path - do: search: body: { @@ -106,13 +106,13 @@ teardown: - length: { hits.hits: 1 } - match: { hits.hits.0._source.catalog.title: "Lucene in Action" } - # Term Query with dot path. + # Term Query2 with dot path. - do: search: body: { _source: true, query: { - term: { catalog.author.given: "Mike" } + term: { "catalog.author.given": "Mike" } } } diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index b8be530f00718..0db2e5c66b177 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -106,6 +106,10 @@ private void parseToken(StringBuilder path) throws IOException { path.append(DOT_SYMBOL + currentFieldName); this.keyList.add(currentFieldName); parseToken(path); + int dotIndex = path.lastIndexOf(DOT_SYMBOL); + if (dotIndex != -1) { + path.delete(dotIndex, path.length()); + } } else { path.append(DOT_SYMBOL + currentFieldName); parseValue(currentFieldName, parsedFields); From 1753062e8b94274e8a0a7b6cdfe1b0498311991d Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Thu, 23 Mar 2023 21:26:16 -0700 Subject: [PATCH 10/25] Skip IT tests before 2.7.0 Signed-off-by: Mingshi Liu --- .../main/resources/rest-api-spec/test/index/90_flat_object.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml index 017843930527f..13a6e94f8d24a 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml @@ -22,6 +22,9 @@ teardown: # Upload a sample document to index --- "Flat object test": + - skip: + version: " 2.7.0 - " + reason: "flat-object is introduced in 2.7.0" - do: index: index: test From 0a5d0b9984b2e20bb1593a586fcd643b2b75398d Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Thu, 23 Mar 2023 21:54:16 -0700 Subject: [PATCH 11/25] Revert "Skip IT tests before 2.7.0" Signed-off-by: Mingshi Liu --- .../resources/rest-api-spec/test/index/90_flat_object.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml index 13a6e94f8d24a..b638686d2dedd 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml @@ -23,8 +23,8 @@ teardown: --- "Flat object test": - skip: - version: " 2.7.0 - " - reason: "flat-object is introduced in 2.7.0" + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" - do: index: index: test From cdf6797de8930613e187101e3032285dee76df19 Mon Sep 17 00:00:00 2001 From: Mingshi Liu Date: Mon, 27 Mar 2023 22:24:33 -0700 Subject: [PATCH 12/25] Add more IT tests for supported queries Signed-off-by: Mingshi Liu --- .idea/runConfigurations/Debug_OpenSearch.xml | 6 +- .../test/index/90_flat_object.yml | 185 +++++++++++++++++- .../xcontent/JsonToStringXContentParser.java | 5 +- .../index/mapper/FlatObjectFieldMapper.java | 30 +-- .../mapper/FlatObjectFieldMapperTests.java | 7 +- 5 files changed, 200 insertions(+), 33 deletions(-) diff --git a/.idea/runConfigurations/Debug_OpenSearch.xml b/.idea/runConfigurations/Debug_OpenSearch.xml index 0d8bf59823acf..c18046f873477 100644 --- a/.idea/runConfigurations/Debug_OpenSearch.xml +++ b/.idea/runConfigurations/Debug_OpenSearch.xml @@ -6,6 +6,10 @@