diff --git a/CHANGELOG.md b/CHANGELOG.md index bb552b6ffb70a..2ac42429471b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Changed - Require MediaType in Strings.toString API ([#6009](https://github.com/opensearch-project/OpenSearch/pull/6009)) - [Refactor] XContent base classes from xcontent to core library ([#5902](https://github.com/opensearch-project/OpenSearch/pull/5902)) +- Added a new field type: flat_object ([#6507](https://github.com/opensearch-project/OpenSearch/pull/6507)) ### Deprecated - Map, List, and Set in org.opensearch.common.collect ([#6609](https://github.com/opensearch-project/OpenSearch/pull/6609)) diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml index a006fde630716..b360d8dc01ccf 100644 --- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml @@ -482,3 +482,100 @@ }] - match: { error.root_cause.0.type: "illegal_argument_exception" } - match: { error.root_cause.0.reason: "script score function must not produce negative scores, but got: [-9.0]"} + +--- + +"Flat-object fields from within the scripting": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + - do: + indices.create: + index: test + body: + mappings: + properties: + flat: + type : "flat_object" + + # This document has 6 distinct parts in its flat_object field paths: + # - flat.field_1 + # - flat.field_2 + # - flat.field_3 + # - flat.inner + # - flat.field_A + # - flat.field_B + - do: + index: + index: test + id: 1 + body: { + "flat": { + "field_1": "John Doe", + "field_2": 33, + "field_3": false, + "inner": { + "field_A": ["foo", "bar"], + "field_B": false + } + } + } + + - do: + index: + index: test + id: 2 + body: { + "flat": { + "field_1": "Joe Public", + "field_2": 45 + } + } + + - do: + indices.refresh: + index: test + + # It is possible to filter based on the number of distinct parts of flat_object field paths + - do: + search: + body: { + _source: true, + query: { + bool: { + filter: { + script: { + script: { + source: "doc['flat'].size() == 6", + lang: "painless" + } + } + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.flat.field_1: "John Doe" } + + - do: + search: + body: { + _source: true, + query: { + bool: { + filter: { + script: { + script: { + source: "doc['flat'].size() < 6", + lang: "painless" + } + } + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.flat.field_1: "Joe Public" } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml new file mode 100644 index 0000000000000..88cb2f1716c9b --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml @@ -0,0 +1,746 @@ +--- +# The initial test setup includes: +# - Create flat_object mapping +# - Index two example documents +# - Refresh the index so it is ready for search tests +setup: + - do: + indices.create: + index: test + body: + mappings: + properties: + ISBN13: + type : "keyword" + catalog: + type : "flat_object" + required_matches: + type : "long" + + - do: + index: + index: test + id: 1 + body: { + "ISBN13": "V9781933988177", + "catalog": { + "title": "Lucene in Action", + "author": + { + "surname": "McCandless", + "given": "Mike" + }, + "catalogId":"c-0002", + "quantity": 1234, + "rating": 9.2, + "location": [-81.7982,41.3847 ], + "review": [["great",99.8],["ok",80.0]], + "publishDate": "2015-01-01" + }, + "required_matches": 1 + } + + - do: + index: + index: test + id: 2 + body: { + "ISBN13": "V12154942129175", + "catalog": { + "title": "Mock in Action", + "author": + { + "surname": "Doe", + "given": "John" + }, + "catalogId": "c-0050", + "quantity": 4321, + "rating": 5.2, + "location": [-12.7982,33.3847 ], + "review": [["bad",30.41],["ok",80.0]], + "publishDate": "2016-01-01" + }, + "required_matches": 1 + } + + # Do index refresh + - do: + indices.refresh: + index: test + +--- +# Delete Index when connection is teardown +teardown: + - do: + indices.delete: + index: test + +--- +# Verify that mappings under the catalog field did not expand +# and no dynamic fields were created. +"Mappings": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + - do: + indices.get_mapping: + index: test + - is_true: test.mappings + - match: { test.mappings.properties.ISBN13.type: keyword } + - match: { test.mappings.properties.catalog.type: flat_object } + - match: { test.mappings.properties.required_matches.type: long } + # https://github.com/opensearch-project/OpenSearch/tree/main/rest-api-spec/src/main/resources/rest-api-spec/test#length + - length: { test.mappings.properties: 3 } + - length: { test.mappings.properties.catalog: 1 } + +--- +"Supported queries": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + # Verify Document Count + - do: + search: + body: { + query: { + match_all: {} + } + } + + - length: { hits.hits: 2 } + + # Match Query with dot path. + - do: + search: + body: { + _source: true, + query: { + match: { "catalog.title": "Lucene in Action"} + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.title: "Lucene in Action" } + + # Match Query without dot path. + - do: + search: + body: { + _source: true, + query: { + match: { catalog: "Lucene in Action"} + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.title: "Lucene in Action" } + + + # Multi Match Query without dot path. + - do: + search: + body: { + _source: true, + query: { + multi_match: { + "query": "Mike", + "fields": [ "ISBN13", "catalog" ] + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Multi Match Query with dot path. + - do: + search: + body: { + _source: true, + query: { + multi_match: { + "query": "Mike", + "fields": [ "ISBN13", "catalog.author.given" ] + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Term Query1 with dot path for date + - do: + search: + body: { + _source: true, + query: { + term: { catalog.publishDate: "2015-01-01"} + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.publishDate: "2015-01-01" } + + # Term Query1 without dot path for date + - do: + search: + body: { + _source: true, + query: { + term: { catalog: "2015-01-01" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.publishDate: "2015-01-01" } + + # Term Query2 with dot path for string + - do: + search: + body: { + _source: true, + query: { + term: { "catalog.author.given": "Mike" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Term Query2 without dot path. + - do: + search: + body: { + _source: true, + query: { + term: { catalog: "Mike" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Term Query3 with dot path for array + - do: + search: + body: { + _source: true, + query: { + term: { catalog.location: "-12.7982" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.location: [-12.7982,33.3847 ]} + + # Term Query3 without dot path for array + - do: + search: + body: { + _source: true, + query: { + term: { catalog: "-12.7982" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.location: [-12.7982,33.3847 ]} + + + # Term Query4 with dot path for nested-array + - do: + search: + body: { + _source: true, + query: { + term: { catalog.review: "99.8" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.review: [ [ "great",99.8 ],[ "ok",80.0 ] ] } + + # Term Query4 without dot path for nested-array + - do: + search: + body: { + _source: true, + query: { + term: { catalog: "99.8" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.review: [["great",99.8],["ok",80.0]] } + + # Terms Query without dot path. + - do: + search: + body: { + _source: true, + query: { + terms: { catalog: ["John","Mike"] } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Terms Query with dot path. + - do: + search: + body: { + _source: true, + query: { + terms: { catalog.author.given: ["John","Mike"] } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Termset Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "terms_set": { + "catalog": { + "terms": [ "John","Mike" ], + "minimum_should_match_field": "required_matches"} + } + } + } + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Termset Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "terms_set": { + "catalog.author.given": { + "terms": [ "John","Mike" ], + "minimum_should_match_field": "required_matches"} + } + } + } + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Prefix Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "prefix": { + "catalog.author.given": { + "value": "Mi" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Prefix Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "prefix": { + "catalog": { + "value": "Mi" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Range Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog.catalogId": { + "gte": "c-0000", + "lte": "c-0006" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.catalogId: "c-0002" } + + # Range Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog": { + "gte": "c-0000", + "lte": "c-0006" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.catalogId: "c-0002" } + + # Range Query with integer input with dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog.quantity": { + "gte": 1000, + "lte": 2000 + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.quantity: 1234 } + + # Range Query with integer input without dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog": { + "gte": 1000, + "lte": 2000 + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.quantity: 1234 } + + # Range Query with date input with dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog.publishDate": { + "gte": "2015-01-01", + "lte": "2015-12-31" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.publishDate: "2015-01-01" } + + # Range Query with date input without dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog": { + "gte": "2015-01-01", + "lte": "2015-12-31" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.publishDate: "2015-01-01" } + + # Range Query with double input with dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog.location": { + "gte": 40.1234, + "lte": 42.1234 + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.location: [-81.7982,41.3847] } + + # Range Query with double input without dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "catalog": { + "gte": 40.1234, + "lte": 42.1234 + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.location: [ -81.7982,41.3847 ] } + + # Exists Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "exists": { + "field": catalog.catalogId + } + } + } + + - length: { hits.hits: 2 } + + # Exists Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "exists": { + "field": catalog + } + } + } + + - length: { hits.hits: 2 } + + # Query_string Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "query_string": { + "fields": [ "catalog", "ISBN13" ], + "query": "John OR Mike" + } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Query_string Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "query_string": { + "fields": [ "catalog.author.given", "ISBN13" ], + "query": "John OR Mike" + } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.catalog.author.given: "Mike" } + + # Simple_query_string Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "simple_query_string" : { + "query": "Doe", + "fields": ["catalog", "ISBN13"] + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.surname: "Doe" } + + + # Simple_query_string Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "simple_query_string": { + "query": "Doe", + "fields": [ "catalog.author.surname", "ISBN13" ] + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.catalog.author.surname: "Doe" } + +--- +"Unsupported": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + # Mapping parameters (such as index/search analyzers) are currently not supported + # The plan is to support them in the next version + - do: + catch: bad_request + indices.create: + index: test_analyzer + body: + mappings: + properties: + data: + type : "flat_object" + analyzer: "standard" + + - match: { error.root_cause.0.type: "mapper_parsing_exception" } + - match: { error.root_cause.0.reason: "Mapping definition for [data] has unsupported parameters: [analyzer : standard]"} + - match: { status: 400 } + + # Wildcard Query with dot path. + - do: + catch: bad_request + search: + body: { + _source: true, + query: { + "wildcard": { + "catalog.title": "Mock*" + } + } + } + - match: { error.root_cause.0.type: "query_shard_exception" } + - match: { error.root_cause.0.reason: "Can only use wildcard queries on keyword and text fields - not on [catalog.title] which is of type [flat_object]"} + - match: { status: 400 } + + # Wildcard Query without dot path. + - do: + catch: bad_request + search: + body: { + _source: true, + query: { + "wildcard": { + "catalog": "Mock*" + } + } + } + - match: { error.root_cause.0.type: "query_shard_exception" } + - match: { error.root_cause.0.reason: "Can only use wildcard queries on keyword and text fields - not on [catalog] which is of type [flat_object]" } + - match: { status: 400 } + + # Aggregation and Match Query with dot path. + - do: + catch: bad_request + search: + body: { + _source: true, + size: 0, + query: { + "match": { + "ISBN13": "V9781933988177" + } + }, + aggs: { + "avg_rating": { + "avg": { + "field": "catalog.rating" + } + } + } + } + - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: "Field [catalog.rating] of type [flat_object] is not supported for aggregation [avg]" } + - match: { status: 400 } + + # Aggregation using average and Match Query with dot path. + - do: + catch: bad_request + search: + body: { + _source: true, + size: 0, + query: { + "match": { + "ISBN13": "V9781933988177" + } + }, + aggs: { + "avg_rating": { + "avg": { + "field": "catalog.rating" + } + } + } + } + - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: "Field [catalog.rating] of type [flat_object] is not supported for aggregation [avg]" } + - match: { status: 400 } + + # Aggregation using geolocation and Match Query with dot path. + - do: + catch: bad_request + search: + body: { + _source: true, + size: 0, + query: { + "match": { + "ISBN13": "V9781933988177" + } + }, + aggs: { + "books_in_location": { + "geo_distance": { + "field": "catalog.location", + "origin": "41.3847,-81.7982", + "unit": "km", + "ranges": [ + { + "to": 100 + } + ] + }, + "aggs": { + "total_books": { + "sum": { + "field": "catalog.quantity" + } + } + } + } + } + } + - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: "Field [catalog.location] of type [flat_object] is not supported for aggregation [geo_distance]" } + - match: { status: 400 } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchFieldsIT.java b/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchFieldsIT.java index ca6d7bc0562d2..8f0b98fd1c19e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchFieldsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/fields/SearchFieldsIT.java @@ -862,6 +862,9 @@ public void testDocValueFields() throws Exception { .startObject("ip_field") .field("type", "ip") .endObject() + .startObject("flat_object_field") + .field("type", "flat_object") + .endObject() .endObject() .endObject() .endObject() @@ -886,6 +889,10 @@ public void testDocValueFields() throws Exception { .field("boolean_field", true) .field("binary_field", new byte[] { 42, 100 }) .field("ip_field", "::1") + .field("flat_object_field") + .startObject() + .field("foo", "bar") + .endObject() .endObject() ) .get(); @@ -905,7 +912,8 @@ public void testDocValueFields() throws Exception { .addDocValueField("date_field") .addDocValueField("boolean_field") .addDocValueField("binary_field") - .addDocValueField("ip_field"); + .addDocValueField("ip_field") + .addDocValueField("flat_object_field"); SearchResponse searchResponse = builder.get(); assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L)); @@ -926,11 +934,14 @@ public void testDocValueFields() throws Exception { "text_field", "keyword_field", "binary_field", - "ip_field" + "ip_field", + "flat_object_field" ) ) ); - + String json = Strings.toString( + XContentFactory.jsonBuilder().startObject().startObject("flat_object_field").field("foo", "bar").endObject().endObject() + ); assertThat(searchResponse.getHits().getAt(0).getFields().get("byte_field").getValue().toString(), equalTo("1")); assertThat(searchResponse.getHits().getAt(0).getFields().get("short_field").getValue().toString(), equalTo("2")); assertThat(searchResponse.getHits().getAt(0).getFields().get("integer_field").getValue(), equalTo((Object) 3L)); @@ -946,6 +957,7 @@ public void testDocValueFields() throws Exception { assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo")); assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ")); assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1")); + assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field").getValue(), equalTo("flat_object_field.foo")); builder = client().prepareSearch().setQuery(matchAllQuery()).addDocValueField("*field"); searchResponse = builder.get(); @@ -968,7 +980,8 @@ public void testDocValueFields() throws Exception { "text_field", "keyword_field", "binary_field", - "ip_field" + "ip_field", + "flat_object_field" ) ) ); @@ -988,6 +1001,7 @@ public void testDocValueFields() throws Exception { assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo")); assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ")); assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1")); + assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field").getValue(), equalTo("flat_object_field.foo")); builder = client().prepareSearch() .setQuery(matchAllQuery()) @@ -1002,7 +1016,9 @@ public void testDocValueFields() throws Exception { .addDocValueField("date_field", "use_field_mapping") .addDocValueField("boolean_field", "use_field_mapping") .addDocValueField("binary_field", "use_field_mapping") - .addDocValueField("ip_field", "use_field_mapping"); + .addDocValueField("ip_field", "use_field_mapping") + .addDocValueField("flat_object_field", "use_field_mapping"); + ; searchResponse = builder.get(); assertThat(searchResponse.getHits().getTotalHits().value, equalTo(1L)); @@ -1023,7 +1039,8 @@ public void testDocValueFields() throws Exception { "text_field", "keyword_field", "binary_field", - "ip_field" + "ip_field", + "flat_object_field" ) ) ); @@ -1043,6 +1060,7 @@ public void testDocValueFields() throws Exception { assertThat(searchResponse.getHits().getAt(0).getFields().get("keyword_field").getValue(), equalTo("foo")); assertThat(searchResponse.getHits().getAt(0).getFields().get("binary_field").getValue(), equalTo("KmQ")); assertThat(searchResponse.getHits().getAt(0).getFields().get("ip_field").getValue(), equalTo("::1")); + assertThat(searchResponse.getHits().getAt(0).getFields().get("flat_object_field").getValue(), equalTo("flat_object_field.foo")); builder = client().prepareSearch() .setQuery(matchAllQuery()) diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java new file mode 100644 index 0000000000000..71a2381c24f67 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -0,0 +1,257 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.xcontent; + +import org.opensearch.common.bytes.BytesReference; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.xcontent.AbstractXContentParser; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentLocation; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.mapper.ParseContext; +import java.io.IOException; +import java.nio.CharBuffer; +import java.util.ArrayList; + +/** + * JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser + * returns XContentParser with one parent field and subfields + * fieldName, fieldName._value, fieldName._valueAndPath + * @opensearch.internal + */ +public class JsonToStringXContentParser extends AbstractXContentParser { + private final String fieldTypeName; + private XContentParser parser; + + private ArrayList valueList = new ArrayList<>(); + private ArrayList valueAndPathList = new ArrayList<>(); + private ArrayList keyList = new ArrayList<>(); + + private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent); + private ParseContext parseContext; + + private NamedXContentRegistry xContentRegistry; + + private DeprecationHandler deprecationHandler; + + private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; + private static final String VALUE_SUFFIX = "._value"; + private static final String DOT_SYMBOL = "."; + private static final String EQUAL_SYMBOL = "="; + + public JsonToStringXContentParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + ParseContext parseContext, + String fieldTypeName + ) throws IOException { + super(xContentRegistry, deprecationHandler); + this.parseContext = parseContext; + this.deprecationHandler = deprecationHandler; + this.xContentRegistry = xContentRegistry; + this.parser = parseContext.parser(); + this.fieldTypeName = fieldTypeName; + } + + public XContentParser parseObject() throws IOException { + builder.startObject(); + StringBuilder path = new StringBuilder(fieldTypeName); + parseToken(path, null); + builder.field(this.fieldTypeName, keyList); + builder.field(this.fieldTypeName + VALUE_SUFFIX, valueList); + builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, valueAndPathList); + builder.endObject(); + String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, XContentType.JSON); + return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString)); + } + + private void parseToken(StringBuilder path, String currentFieldName) throws IOException { + + while (this.parser.nextToken() != Token.END_OBJECT) { + if (this.parser.currentName() != null) { + currentFieldName = this.parser.currentName(); + } + StringBuilder parsedFields = new StringBuilder(); + + if (this.parser.currentToken() == Token.FIELD_NAME) { + path.append(DOT_SYMBOL + currentFieldName); + this.keyList.add(currentFieldName); + } else if (this.parser.currentToken() == Token.START_ARRAY) { + parseToken(path, currentFieldName); + break; + } else if (this.parser.currentToken() == Token.END_ARRAY) { + // skip + } else if (this.parser.currentToken() == Token.START_OBJECT) { + parseToken(path, currentFieldName); + int dotIndex = path.lastIndexOf(DOT_SYMBOL); + if (dotIndex != -1) { + path.delete(dotIndex, path.length()); + } + } else { + if (!path.toString().contains(currentFieldName)) { + path.append(DOT_SYMBOL + currentFieldName); + } + parseValue(parsedFields); + this.valueList.add(parsedFields.toString()); + this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); + int dotIndex = path.lastIndexOf(DOT_SYMBOL); + if (dotIndex != -1) { + path.delete(dotIndex, path.length()); + } + } + + } + } + + private void parseValue(StringBuilder parsedFields) throws IOException { + switch (this.parser.currentToken()) { + case VALUE_BOOLEAN: + case VALUE_NUMBER: + case VALUE_STRING: + case VALUE_NULL: + parsedFields.append(this.parser.textOrNull()); + break; + // Handle other token types as needed + case FIELD_NAME: + case VALUE_EMBEDDED_OBJECT: + case END_ARRAY: + case START_ARRAY: + break; + default: + throw new IOException("Unsupported token type [" + parser.currentToken() + "]"); + } + } + + @Override + public XContentType contentType() { + return XContentType.JSON; + } + + @Override + public Token nextToken() throws IOException { + return this.parser.nextToken(); + } + + @Override + public void skipChildren() throws IOException { + this.parser.skipChildren(); + } + + @Override + public Token currentToken() { + return this.parser.currentToken(); + } + + @Override + public String currentName() throws IOException { + return this.parser.currentName(); + } + + @Override + public String text() throws IOException { + return this.parser.text(); + } + + @Override + public CharBuffer charBuffer() throws IOException { + return this.parser.charBuffer(); + } + + @Override + public Object objectText() throws IOException { + return this.parser.objectText(); + } + + @Override + public Object objectBytes() throws IOException { + return this.parser.objectBytes(); + } + + @Override + public boolean hasTextCharacters() { + return this.parser.hasTextCharacters(); + } + + @Override + public char[] textCharacters() throws IOException { + return this.parser.textCharacters(); + } + + @Override + public int textLength() throws IOException { + return this.parser.textLength(); + } + + @Override + public int textOffset() throws IOException { + return this.parser.textOffset(); + } + + @Override + public Number numberValue() throws IOException { + return this.parser.numberValue(); + } + + @Override + public NumberType numberType() throws IOException { + return this.parser.numberType(); + } + + @Override + public byte[] binaryValue() throws IOException { + return this.parser.binaryValue(); + } + + @Override + public XContentLocation getTokenLocation() { + return this.parser.getTokenLocation(); + } + + @Override + protected boolean doBooleanValue() throws IOException { + return this.parser.booleanValue(); + } + + @Override + protected short doShortValue() throws IOException { + return this.parser.shortValue(); + } + + @Override + protected int doIntValue() throws IOException { + return this.parser.intValue(); + } + + @Override + protected long doLongValue() throws IOException { + return this.parser.longValue(); + } + + @Override + protected float doFloatValue() throws IOException { + return this.parser.floatValue(); + } + + @Override + protected double doDoubleValue() throws IOException { + return this.parser.doubleValue(); + } + + @Override + public boolean isClosed() { + return this.parser.isClosed(); + } + + @Override + public void close() throws IOException { + this.parser.close(); + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/DynamicKeyFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DynamicKeyFieldMapper.java index 71f4c312a8c58..94bc4806ba0e0 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DynamicKeyFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DynamicKeyFieldMapper.java @@ -49,9 +49,6 @@ * sure to passes an empty multi-fields list to help prevent conflicting sub-keys from being * registered. * - * Note: we anticipate that 'flattened' fields will be the only implementation of this - * interface. Flattened object fields live in the 'mapper-flattened' module. - * * @opensearch.internal */ public abstract class DynamicKeyFieldMapper extends FieldMapper { diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java new file mode 100644 index 0000000000000..e0b37df5c1734 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -0,0 +1,760 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.util.BytesRef; +import org.opensearch.OpenSearchException; +import org.opensearch.Version; +import org.opensearch.common.Nullable; +import org.opensearch.common.collect.Iterators; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.lucene.search.AutomatonQueries; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.common.xcontent.JsonToStringXContentParser; +import org.opensearch.index.analysis.NamedAnalyzer; +import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.QueryShardException; +import org.opensearch.search.aggregations.support.CoreValuesSourceType; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; +import java.util.function.Supplier; + +import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES; + +/** + * A field mapper for flat_objects. + * This mapper accepts JSON object and treat as string fields in one index. + * @opensearch.internal + */ +public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper { + + public static final String CONTENT_TYPE = "flat_object"; + private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; + private static final String VALUE_SUFFIX = "._value"; + private static final String DOT_SYMBOL = "."; + private static final String EQUAL_SYMBOL = "="; + + /** + * In flat_object field mapper, field type is similar to keyword field type + * Cannot be tokenized, can OmitNorms, and can setIndexOption. + * @opensearch.internal + */ + public static class Defaults { + public static final FieldType FIELD_TYPE = new FieldType(); + + static { + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.freeze(); + } + + } + + @Override + public MappedFieldType keyedFieldType(String key) { + return new FlatObjectFieldType(this.name() + DOT_SYMBOL + key); + } + + /** + * FlatObjectFieldType is the parent field type. + */ + public static class FlatObjectField extends Field { + + public FlatObjectField(String field, BytesRef term, FieldType ft) { + super(field, term, ft); + } + + } + + /** + * The builder for the flat_object field mapper using default parameters + * @opensearch.internal + */ + public static class Builder extends FieldMapper.Builder { + + public Builder(String name) { + super(name, Defaults.FIELD_TYPE); + builder = this; + } + + private FlatObjectFieldType buildFlatObjectFieldType(BuilderContext context, FieldType fieldType) { + return new FlatObjectFieldType(buildFullName(context), fieldType); + } + + /** + * ValueFieldMapper is the subfield type for values in the Json. + * use a {@link KeywordFieldMapper.KeywordField} + */ + private ValueFieldMapper buildValueFieldMapper(BuilderContext context, FieldType fieldType, FlatObjectFieldType fft) { + String fullName = buildFullName(context); + FieldType vft = new FieldType(fieldType); + KeywordFieldMapper.KeywordFieldType valueFieldType = new KeywordFieldMapper.KeywordFieldType(fullName + VALUE_SUFFIX, vft); + + fft.setValueFieldType(valueFieldType); + return new ValueFieldMapper(vft, valueFieldType); + } + + /** + * ValueAndPathFieldMapper is the subfield type for path=value format in the Json. + * also use a {@link KeywordFieldMapper.KeywordField} + */ + private ValueAndPathFieldMapper buildValueAndPathFieldMapper(BuilderContext context, FieldType fieldType, FlatObjectFieldType fft) { + String fullName = buildFullName(context); + FieldType vft = new FieldType(fieldType); + KeywordFieldMapper.KeywordFieldType ValueAndPathFieldType = new KeywordFieldMapper.KeywordFieldType( + fullName + VALUE_AND_PATH_SUFFIX, + vft + ); + fft.setValueAndPathFieldType(ValueAndPathFieldType); + return new ValueAndPathFieldMapper(vft, ValueAndPathFieldType); + } + + @Override + public FlatObjectFieldMapper build(BuilderContext context) { + FieldType fieldtype = new FieldType(Defaults.FIELD_TYPE); + FlatObjectFieldType fft = buildFlatObjectFieldType(context, fieldtype); + return new FlatObjectFieldMapper( + name, + Defaults.FIELD_TYPE, + fft, + buildValueFieldMapper(context, fieldtype, fft), + buildValueAndPathFieldMapper(context, fieldtype, fft), + CopyTo.empty(), + this + ); + } + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n)); + + /** + * Creates a new TypeParser for flatObjectFieldMapper that does not use ParameterizedFieldMapper + */ + public static class TypeParser implements Mapper.TypeParser { + private final BiFunction builderFunction; + + public TypeParser(BiFunction builderFunction) { + this.builderFunction = builderFunction; + } + + @Override + public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { + Builder builder = builderFunction.apply(name, parserContext); + return builder; + } + } + + /** + * flat_object fields type contains its own fieldType, one valueFieldType and one valueAndPathFieldType + * @opensearch.internal + */ + public static final class FlatObjectFieldType extends StringFieldType { + + private final int ignoreAbove; + private final String nullValue; + + private KeywordFieldMapper.KeywordFieldType valueFieldType; + + private KeywordFieldMapper.KeywordFieldType valueAndPathFieldType; + + public FlatObjectFieldType(String name, boolean isSearchable, boolean hasDocValues, Map meta) { + super(name, isSearchable, false, true, TextSearchInfo.SIMPLE_MATCH_ONLY, meta); + setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); + this.ignoreAbove = Integer.MAX_VALUE; + this.nullValue = null; + } + + public FlatObjectFieldType(String name) { + this(name, true, true, Collections.emptyMap()); + } + + public FlatObjectFieldType(String name, FieldType fieldType) { + super( + name, + fieldType.indexOptions() != IndexOptions.NONE, + false, + true, + new TextSearchInfo(fieldType, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER), + Collections.emptyMap() + ); + this.ignoreAbove = Integer.MAX_VALUE; + this.nullValue = null; + } + + public FlatObjectFieldType(String name, NamedAnalyzer analyzer) { + super(name, true, false, true, new TextSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), Collections.emptyMap()); + this.ignoreAbove = Integer.MAX_VALUE; + this.nullValue = null; + } + + void setValueFieldType(KeywordFieldMapper.KeywordFieldType valueFieldType) { + this.valueFieldType = valueFieldType; + } + + void setValueAndPathFieldType(KeywordFieldMapper.KeywordFieldType ValueAndPathFieldType) { + this.valueAndPathFieldType = ValueAndPathFieldType; + } + + public KeywordFieldMapper.KeywordFieldType getValueFieldType() { + return this.valueFieldType; + } + + public KeywordFieldMapper.KeywordFieldType getValueAndPathFieldType() { + return this.valueAndPathFieldType; + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + NamedAnalyzer normalizer() { + return indexAnalyzer(); + } + + /** + * + * Fielddata is an in-memory data structure that is used for aggregations, sorting, and scripting. + * @param fullyQualifiedIndexName the name of the index this field-data is build for + * @param searchLookup a {@link SearchLookup} supplier to allow for accessing other fields values in the context of runtime fields + * @return IndexFieldData.Builder + */ + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { + failIfNoDocValues(); + return new SortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES); + } + + @Override + public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + } + + return new SourceValueFetcher(name(), context, nullValue) { + @Override + protected String parseSourceValue(Object value) { + String flatObjectKeywordValue = value.toString(); + + if (flatObjectKeywordValue.length() > ignoreAbove) { + return null; + } + + NamedAnalyzer normalizer = normalizer(); + if (normalizer == null) { + return flatObjectKeywordValue; + } + + try { + return normalizeValue(normalizer, name(), flatObjectKeywordValue); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + }; + } + + @Override + public Object valueForDisplay(Object value) { + if (value == null) { + return null; + } + // flat_objects are internally stored as utf8 bytes + BytesRef binaryValue = (BytesRef) value; + return binaryValue.utf8ToString(); + } + + @Override + protected BytesRef indexedValueForSearch(Object value) { + if (getTextSearchInfo().getSearchAnalyzer() == Lucene.KEYWORD_ANALYZER) { + // flat_object analyzer with the default attribute source which encodes terms using UTF8 + // in that case we skip normalization, which may be slow if there many terms need to + // parse (eg. large terms query) since Analyzer.normalize involves things like creating + // attributes through reflection + // This if statement will be used whenever a normalizer is NOT configured + return super.indexedValueForSearch(value); + } + + if (value == null) { + return null; + } + value = inputToString(value); + return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString()); + } + + /** + * redirect queries with rewrite value to rewriteSearchValue and directSubFieldName + */ + @Override + public Query termQuery(Object value, @Nullable QueryShardContext context) { + + String searchValueString = inputToString(value); + String directSubFieldName = directSubfield(); + String rewriteSearchValue = rewriteValue(searchValueString); + + failIfNotIndexed(); + Query query; + query = new TermQuery(new Term(directSubFieldName, indexedValueForSearch(rewriteSearchValue))); + if (boost() != 1f) { + query = new BoostQuery(query, boost()); + } + return query; + } + + @Override + public Query termsQuery(List values, QueryShardContext context) { + failIfNotIndexed(); + String directedSearchFieldName = directSubfield(); + BytesRef[] bytesRefs = new BytesRef[values.size()]; + for (int i = 0; i < bytesRefs.length; i++) { + String rewriteValues = rewriteValue(inputToString(values.get(i))); + + bytesRefs[i] = indexedValueForSearch(new BytesRef(rewriteValues)); + + } + + return new TermInSetQuery(directedSearchFieldName, bytesRefs); + } + + /** + * To direct search fields, if a dot path was used in search query, + * then direct to flatObjectFieldName._valueAndPath subfield, + * else, direct to flatObjectFieldName._value subfield. + * @return directedSubFieldName + */ + public String directSubfield() { + if (name().contains(DOT_SYMBOL)) { + String[] dotPathList = name().split("\\."); + return dotPathList[0] + VALUE_AND_PATH_SUFFIX; + } else { + return this.valueFieldType.name(); + } + } + + /** + * If the search key is assigned with value, + * the dot path was used in search query, then + * rewrite the searchValueString as the format "dotpath=value", + * @return rewriteSearchValue + */ + public String rewriteValue(String searchValueString) { + if (!name().contains(DOT_SYMBOL)) { + return searchValueString; + } else { + String rewriteSearchValue = new StringBuilder().append(name()).append(EQUAL_SYMBOL).append(searchValueString).toString(); + return rewriteSearchValue; + } + + } + + private String inputToString(Object inputValue) { + if (inputValue instanceof Integer) { + String inputToString = Integer.toString((Integer) inputValue); + return inputToString; + } else if (inputValue instanceof Float) { + String inputToString = Float.toString((Float) inputValue); + return inputToString; + } else if (inputValue instanceof Boolean) { + String inputToString = Boolean.toString((Boolean) inputValue); + return inputToString; + } else if (inputValue instanceof Short) { + String inputToString = Short.toString((Short) inputValue); + return inputToString; + } else if (inputValue instanceof Long) { + String inputToString = Long.toString((Long) inputValue); + return inputToString; + } else if (inputValue instanceof Double) { + String inputToString = Double.toString((Double) inputValue); + return inputToString; + } else if (inputValue instanceof BytesRef) { + String inputToString = (((BytesRef) inputValue).utf8ToString()); + return inputToString; + } else if (inputValue instanceof String) { + String inputToString = (String) inputValue; + return inputToString; + } else if (inputValue instanceof Version) { + String inputToString = inputValue.toString(); + return inputToString; + } else { + // default to cast toString + return inputValue.toString(); + } + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, QueryShardContext context) { + String directSubfield = directSubfield(); + String rewriteValue = rewriteValue(value); + + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[prefix] queries cannot be executed when '" + + ALLOW_EXPENSIVE_QUERIES.getKey() + + "' is set to false. For optimised prefix queries on text " + + "fields please enable [index_prefixes]." + ); + } + failIfNotIndexed(); + if (method == null) { + method = MultiTermQuery.CONSTANT_SCORE_REWRITE; + } + if (caseInsensitive) { + return AutomatonQueries.caseInsensitivePrefixQuery((new Term(directSubfield, indexedValueForSearch(rewriteValue))), method); + } + return new PrefixQuery(new Term(directSubfield, indexedValueForSearch(rewriteValue)), method); + } + + @Override + public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { + String directSubfield = directSubfield(); + String rewriteUpperTerm = rewriteValue(inputToString(upperTerm)); + String rewriteLowerTerm = rewriteValue(inputToString(lowerTerm)); + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[range] queries on [text] or [keyword] fields cannot be executed when '" + + ALLOW_EXPENSIVE_QUERIES.getKey() + + "' is set to false." + ); + } + failIfNotIndexed(); + return new TermRangeQuery( + directSubfield, + lowerTerm == null ? null : indexedValueForSearch(rewriteLowerTerm), + upperTerm == null ? null : indexedValueForSearch(rewriteUpperTerm), + includeLower, + includeUpper + ); + } + + /** + * if there is dot path. query the field name in flatObject parent field. + * else query in _field_names system field + */ + @Override + public Query existsQuery(QueryShardContext context) { + String searchKey; + String searchField; + if (name().contains(DOT_SYMBOL)) { + searchKey = name().split("\\.")[0]; + searchField = name(); + } else { + searchKey = FieldNamesFieldMapper.NAME; + searchField = name(); + } + return new TermQuery(new Term(searchKey, indexedValueForSearch(searchField))); + } + + @Override + public Query wildcardQuery( + String value, + @Nullable MultiTermQuery.RewriteMethod method, + boolean caseInsensitve, + QueryShardContext context + ) { + // flat_object field types are always normalized, so ignore case sensitivity and force normalize the wildcard + // query text + throw new QueryShardException( + context, + "Can only use wildcard queries on keyword and text fields - not on [" + name() + "] which is of type [" + typeName() + "]" + ); + + } + + } + + private final ValueFieldMapper valueFieldMapper; + private final ValueAndPathFieldMapper valueAndPathFieldMapper; + + FlatObjectFieldMapper( + String simpleName, + FieldType fieldType, + FlatObjectFieldType mappedFieldType, + ValueFieldMapper valueFieldMapper, + ValueAndPathFieldMapper valueAndPathFieldMapper, + CopyTo copyTo, + Builder builder + ) { + super(simpleName, fieldType, mappedFieldType, copyTo); + assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; + this.fieldType = fieldType; + this.valueFieldMapper = valueFieldMapper; + this.valueAndPathFieldMapper = valueAndPathFieldMapper; + this.mappedFieldType = mappedFieldType; + } + + @Override + protected FlatObjectFieldMapper clone() { + return (FlatObjectFieldMapper) super.clone(); + } + + @Override + protected void mergeOptions(FieldMapper other, List conflicts) { + + } + + @Override + public FlatObjectFieldType fieldType() { + return (FlatObjectFieldType) super.fieldType(); + } + + @Override + protected void parseCreateField(ParseContext context) throws IOException { + String fieldName = null; + + if (context.externalValueSet()) { + String value = context.externalValue().toString(); + parseValueAddFields(context, value, fieldType().name()); + } else { + JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + context, + fieldType().name() + ); + /** + * JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser + * It reads the JSON object and parsed to a list of string + */ + XContentParser parser = JsonToStringParser.parseObject(); + + XContentParser.Token currentToken; + while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + switch (currentToken) { + case FIELD_NAME: + fieldName = parser.currentName(); + break; + case VALUE_STRING: + String value = parser.textOrNull(); + parseValueAddFields(context, value, fieldName); + break; + } + + } + + } + + } + + @Override + public Iterator iterator() { + List subIterators = new ArrayList<>(); + if (valueFieldMapper != null) { + subIterators.add(valueFieldMapper); + } + if (valueAndPathFieldMapper != null) { + subIterators.add(valueAndPathFieldMapper); + } + if (subIterators.size() == 0) { + return super.iterator(); + } + @SuppressWarnings("unchecked") + Iterator concat = Iterators.concat(super.iterator(), subIterators.iterator()); + return concat; + } + + /** + * parseValueAddFields method will store data to Lucene. + * the JsonToStringXContentParser returns XContentParser with 3 string fields + * fieldName, fieldName._value, fieldName._valueAndPath. + * parseValueAddFields recognized string by the stringfield name, + * fieldName will be store through the parent FlatObjectFieldMapper,which contains all the keys + * fieldName._value will be store through the valueFieldMapper, which contains the values of the Json Object + * fieldName._valueAndPath will be store through the valueAndPathFieldMapper, which contains the "path=values" format + */ + private void parseValueAddFields(ParseContext context, String value, String fieldName) throws IOException { + + NamedAnalyzer normalizer = fieldType().normalizer(); + if (normalizer != null) { + value = normalizeValue(normalizer, name(), value); + } + + String[] valueTypeList = fieldName.split("\\._"); + String valueType = "._" + valueTypeList[valueTypeList.length - 1]; + + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + // convert to utf8 only once before feeding postings/dv/stored fields + + final BytesRef binaryValue = new BytesRef(fieldType().name() + DOT_SYMBOL + value); + Field field = new FlatObjectField(fieldType().name(), binaryValue, fieldType); + + if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { + createFieldNamesField(context); + } + if (fieldName.equals(fieldType().name())) { + context.doc().add(field); + } + if (valueType.equals(VALUE_SUFFIX)) { + if (valueFieldMapper != null) { + valueFieldMapper.addField(context, value); + } + } + if (valueType.equals(VALUE_AND_PATH_SUFFIX)) { + if (valueAndPathFieldMapper != null) { + valueAndPathFieldMapper.addField(context, value); + } + } + + if (fieldType().hasDocValues()) { + if (context.doc().getField(fieldType().name()) == null || !context.doc().getFields(fieldType().name()).equals(field)) { + if (fieldName.equals(fieldType().name())) { + context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); + } + if (valueType.equals(VALUE_SUFFIX)) { + if (valueFieldMapper != null) { + context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_SUFFIX, binaryValue)); + } + } + if (valueType.equals(VALUE_AND_PATH_SUFFIX)) { + if (valueAndPathFieldMapper != null) { + context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_AND_PATH_SUFFIX, binaryValue)); + } + } + + } + } + + } + + } + + private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException { + String normalizerErrorMessage = "The normalization token stream is " + + "expected to produce exactly 1 token, but got 0 for analyzer " + + normalizer + + " and input \"" + + value + + "\""; + try (TokenStream ts = normalizer.tokenStream(field, value)) { + final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + if (ts.incrementToken() == false) { + throw new IllegalStateException(normalizerErrorMessage); + } + final String newValue = termAtt.toString(); + if (ts.incrementToken()) { + throw new IllegalStateException(normalizerErrorMessage); + } + ts.end(); + return newValue; + } + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + private static final class ValueAndPathFieldMapper extends FieldMapper { + + protected ValueAndPathFieldMapper(FieldType fieldType, KeywordFieldMapper.KeywordFieldType mappedFieldType) { + super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); + } + + void addField(ParseContext context, String value) { + final BytesRef binaryValue = new BytesRef(value); + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + Field field = new KeywordFieldMapper.KeywordField(fieldType().name(), binaryValue, fieldType); + + context.doc().add(field); + + if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { + createFieldNamesField(context); + } + } + } + + @Override + protected void parseCreateField(ParseContext context) { + throw new UnsupportedOperationException(); + } + + @Override + protected void mergeOptions(FieldMapper other, List conflicts) { + + } + + @Override + protected String contentType() { + return "valueAndPath"; + } + + @Override + public String toString() { + return fieldType().toString(); + } + + } + + private static final class ValueFieldMapper extends FieldMapper { + + protected ValueFieldMapper(FieldType fieldType, KeywordFieldMapper.KeywordFieldType mappedFieldType) { + super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); + } + + void addField(ParseContext context, String value) { + final BytesRef binaryValue = new BytesRef(value); + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + Field field = new KeywordFieldMapper.KeywordField(fieldType().name(), binaryValue, fieldType); + context.doc().add(field); + + if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { + createFieldNamesField(context); + } + } + } + + @Override + protected void parseCreateField(ParseContext context) { + throw new UnsupportedOperationException(); + } + + @Override + protected void mergeOptions(FieldMapper other, List conflicts) { + + } + + @Override + protected String contentType() { + return "value"; + } + + @Override + public String toString() { + return fieldType().toString(); + } + } + +} diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java index 5310e1b1e8397..696536cb85c9d 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesModule.java +++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java @@ -51,6 +51,7 @@ import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.mapper.FieldAliasMapper; import org.opensearch.index.mapper.FieldNamesFieldMapper; +import org.opensearch.index.mapper.FlatObjectFieldMapper; import org.opensearch.index.mapper.GeoPointFieldMapper; import org.opensearch.index.mapper.IdFieldMapper; import org.opensearch.index.mapper.IgnoredFieldMapper; @@ -162,6 +163,7 @@ public static Map getMappers(List mappe mappers.put(CompletionFieldMapper.CONTENT_TYPE, CompletionFieldMapper.PARSER); mappers.put(FieldAliasMapper.CONTENT_TYPE, new FieldAliasMapper.TypeParser()); mappers.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser()); + mappers.put(FlatObjectFieldMapper.CONTENT_TYPE, FlatObjectFieldMapper.PARSER); for (MapperPlugin mapperPlugin : mapperPlugins) { for (Map.Entry entry : mapperPlugin.getMappers().entrySet()) { diff --git a/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java b/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java index 69be53dc1016b..10ccc99f44603 100644 --- a/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java +++ b/server/src/test/java/org/opensearch/index/fielddata/AbstractFieldDataTestCase.java @@ -51,6 +51,7 @@ import org.opensearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.opensearch.index.mapper.BinaryFieldMapper; import org.opensearch.index.mapper.ContentPath; +import org.opensearch.index.mapper.FlatObjectFieldMapper; import org.opensearch.index.mapper.GeoPointFieldMapper; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.MappedFieldType; @@ -142,6 +143,8 @@ public > IFD getForField(String type, String field .fieldType(); } else if (type.equals("geo_point")) { fieldType = new GeoPointFieldMapper.Builder(fieldName).docValues(docValues).build(context).fieldType(); + } else if (type.equals("flat_object")) { + fieldType = new FlatObjectFieldMapper.Builder(fieldName).docValues(docValues).build(context).fieldType(); } else if (type.equals("binary")) { fieldType = new BinaryFieldMapper.Builder(fieldName, docValues).build(context).fieldType(); } else { diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java new file mode 100644 index 0000000000000..54393b10a3c5d --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldDataTests.java @@ -0,0 +1,63 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.mapper; + +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.common.Strings; +import org.opensearch.common.bytes.BytesReference; +import org.opensearch.common.compress.CompressedXContent; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.fielddata.AbstractFieldDataTestCase; +import org.opensearch.index.fielddata.IndexFieldData; + +import java.util.List; + +public class FlatObjectFieldDataTests extends AbstractFieldDataTestCase { + private String FIELD_TYPE = "flat_object"; + + @Override + protected boolean hasDocValues() { + return true; + } + + public void testDocValue() throws Exception { + String mapping = Strings.toString( + XContentFactory.jsonBuilder() + .startObject() + .startObject("test") + .startObject("properties") + .startObject("field") + .field("type", FIELD_TYPE) + .endObject() + .endObject() + .endObject() + .endObject() + ); + final DocumentMapper mapper = mapperService.documentMapperParser().parse("test", new CompressedXContent(mapping)); + + XContentBuilder json = XContentFactory.jsonBuilder().startObject().startObject("field").field("foo", "bar").endObject().endObject(); + ParsedDocument d = mapper.parse(new SourceToParse("test", "1", BytesReference.bytes(json), XContentType.JSON)); + writer.addDocument(d.rootDoc()); + writer.commit(); + + IndexFieldData fieldData = getForField("field"); + List readers = refreshReader(); + assertEquals(1, readers.size()); + + IndexFieldData valueFieldData = getForField("field._value"); + List valueReaders = refreshReader(); + assertEquals(1, valueReaders.size()); + } + + @Override + protected String getFieldDataType() { + return FIELD_TYPE; + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java new file mode 100644 index 0000000000000..309b150f11748 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -0,0 +1,143 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.Strings; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.index.query.QueryShardContext; + +import java.io.IOException; + +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.hamcrest.core.StringContains.containsString; + +public class FlatObjectFieldMapperTests extends MapperTestCase { + private static final String FIELD_TYPE = "flat_object"; + private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; + private static final String VALUE_SUFFIX = "._value"; + + protected boolean supportsMeta() { + return false; + } + + protected boolean supportsOrIgnoresBoost() { + return false; + } + + public void testMapperServiceHasParser() throws IOException { + MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); })); + Mapper.TypeParser parser = mapperService.mapperRegistry.getMapperParsers().get(FIELD_TYPE); + assertNotNull(parser); + assertTrue(parser instanceof FlatObjectFieldMapper.TypeParser); + } + + protected void assertExistsQuery(MapperService mapperService) throws IOException { + ParseContext.Document fields = mapperService.documentMapper().parse(source(this::writeField)).rootDoc(); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + MappedFieldType fieldType = mapperService.fieldType("field"); + Query query = fieldType.existsQuery(queryShardContext); + assertExistsQuery(fieldType, query, fields); + + } + + protected void assertExistsQuery(MappedFieldType fieldType, Query query, ParseContext.Document fields) { + // we always perform a term query against _field_names, even when the field + // is not added to _field_names because it is not indexed nor stored + assertThat(query, instanceOf(TermQuery.class)); + TermQuery termQuery = (TermQuery) query; + assertEquals(FieldNamesFieldMapper.NAME, termQuery.getTerm().field()); + assertEquals("field", termQuery.getTerm().text()); + if (fieldType.isSearchable() || fieldType.isStored()) { + assertNotNull(fields.getField(FieldNamesFieldMapper.NAME)); + } else { + assertNoFieldNamesField(fields); + } + } + + public void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", FIELD_TYPE); + } + + /** + * Writes a sample value for the field to the provided {@link XContentBuilder}. + * @param builder builder + */ + protected void writeFieldValue(XContentBuilder builder) throws IOException { + builder.startObject(); + builder.field("foo", "bar"); + builder.endObject(); + } + + public void testMinimalToMaximal() throws IOException { + XContentBuilder orig = JsonXContent.contentBuilder().startObject(); + createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, ToXContent.EMPTY_PARAMS); + orig.endObject(); + XContentBuilder parsedFromOrig = JsonXContent.contentBuilder().startObject(); + createMapperService(orig).documentMapper().mapping().toXContent(parsedFromOrig, ToXContent.EMPTY_PARAMS); + parsedFromOrig.endObject(); + assertEquals(Strings.toString(orig), Strings.toString(parsedFromOrig)); + assertParseMaximalWarnings(); + } + + public void testDefaults() throws Exception { + XContentBuilder mapping = fieldMapping(this::minimalMapping); + DocumentMapper mapper = createDocumentMapper(mapping); + assertEquals(Strings.toString(mapping), mapper.mappingSource().toString()); + + String json = Strings.toString( + XContentFactory.jsonBuilder().startObject().startObject("field").field("foo", "bar").endObject().endObject() + ); + + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.foo"), fields[0].binaryValue()); + + IndexableFieldType fieldType = fields[0].fieldType(); + assertFalse(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + + // Test internal substring fields as well + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertTrue(fieldValues[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("bar"), fieldValues[0].binaryValue()); + + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValues.length); + assertTrue(fieldValueAndPaths[0] instanceof KeywordFieldMapper.KeywordField); + assertEquals(new BytesRef("field.foo=bar"), fieldValueAndPaths[0].binaryValue()); + } + + public void testNullValue() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + MapperParsingException e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.nullField("field")))); + assertThat(e.getMessage(), containsString("object mapping for [_doc] tried to parse field [field] as object")); + + } + + @Override + protected void registerParameters(ParameterChecker checker) throws IOException { + // In the future we will want to make sure parameter updates are covered. + } + +} diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java index ce5c361ffcf69..e9b2d40fd4ede 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java @@ -50,6 +50,7 @@ import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.mapper.BinaryFieldMapper; +import org.opensearch.index.mapper.FlatObjectFieldMapper; import org.opensearch.index.mapper.GeoPointFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.TextFieldMapper; @@ -102,7 +103,8 @@ protected List getSupportedValuesSourceTypes() { protected List unsupportedMappedFieldTypes() { return Arrays.asList( BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers - GeoPointFieldMapper.CONTENT_TYPE // geopoint fields cannot use term queries + GeoPointFieldMapper.CONTENT_TYPE, // geopoint fields cannot use term queries + FlatObjectFieldMapper.CONTENT_TYPE // flat_object fields are not supported aggregations ); }