From 281e2b4f6a8806361db357ff207be2053a8597d8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 7 Jul 2023 03:03:41 +0000 Subject: [PATCH] Enable Partial Flat Object (#7997) Before this PR, flat_object can only be defined at the root field of JSON object in 2.7 and 2.8, because the json object is flatten so it lost the structure of a tree. Now, this PR enables json object to be partially mapped as flat_object field type while other fields in the JSON can be other field type. --------- Signed-off-by: Mingshi Liu (cherry picked from commit d333cd388e304459fd46999548571e7b01f576ad) Signed-off-by: github-actions[bot] --- CHANGELOG.md | 1 + .../test/index/100_partial_flat_object.yml | 606 +++++++++++++++++ .../index/105_partial_flat_object_nested.yml | 636 ++++++++++++++++++ .../index/mapper/FlatObjectFieldMapper.java | 63 +- .../mapper/FlatObjectFieldTypeTests.java | 134 ++++ 5 files changed, 1424 insertions(+), 16 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/index/100_partial_flat_object.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/index/105_partial_flat_object_nested.yml create mode 100644 server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index c254ee0e1349b..2a1c942b2fe62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Extensions] Support extension additional settings with extension REST initialization ([#8414](https://github.com/opensearch-project/OpenSearch/pull/8414)) - Adds mock implementation for TelemetryPlugin ([#7545](https://github.com/opensearch-project/OpenSearch/issues/7545)) - Create concept of persistent ThreadContext headers that are unstashable ([#8291]()https://github.com/opensearch-project/OpenSearch/pull/8291) +- Enable Partial Flat Object ([#7997](https://github.com/opensearch-project/OpenSearch/pull/7997)) ### Dependencies - Bump `com.azure:azure-storage-common` from 12.21.0 to 12.21.1 (#7566, #7814) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/100_partial_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/100_partial_flat_object.yml new file mode 100644 index 0000000000000..91e4127da9c32 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/100_partial_flat_object.yml @@ -0,0 +1,606 @@ +--- +# The test setup includes: +# - Create flat_object mapping for test_partial_flat_object index +# - Index two example documents +# - Refresh the index so it is ready for search tests + +setup: + - do: + indices.create: + index: test_partial_flat_object + body: + mappings: + properties: + issue: + properties: + number: + type: "integer" + labels: + type: "flat_object" + - do: + index: + index: test_partial_flat_object + id: 1 + body: { + "issue": { + "number": 123, + "labels": { + "version": "2.2", + "backport": [ + "2.0", + "1.9" + ], + "category": { + "type": "API", + "level": "bug" + }, + "createdDate": "2023-01-01", + "comment": [ [ "Doe","Shipped" ],[ "John","Approved" ] ], + "views": 288, + "priority": 5.00 + } + } + } + + - do: + index: + index: test_partial_flat_object + id: 2 + body: { + "issue": { + "number": 456, + "labels": { + "author": "Liu", + "version": "2.1", + "backport": [ + "2.0", + "1.3" + ], + "category": { + "type": "API", + "level": "enhancement" + }, + "createdDate": "2023-02-01", + "comment": [ [ "Mike","LGTM" ],[ "John","Approved" ] ], + "views": 3333, + "priority": 1.50 + } + } + } + + - do: + index: + index: test_partial_flat_object + id: 3 + body: { + "issue": { + "number": 999, + "labels": [ { + "version": "1.1", + "backport": [ + "1.0", + "0.9" + ], + "category": { + "type": "Module", + "level": "feature" + } + } ] + } + } + + - do: + indices.refresh: + index: test_partial_flat_object +--- +# Delete Index when connection is teardown +teardown: + - do: + indices.delete: + index: test_partial_flat_object + + +--- +# Verify that mappings under the catalog field did not expand +# and no dynamic fields were created. +"Mappings": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + - do: + indices.get_mapping: + index: test_partial_flat_object + - is_true: test_partial_flat_object.mappings + - match: { test_partial_flat_object.mappings.properties.issue.properties.number.type: integer } + - match: { test_partial_flat_object.mappings.properties.issue.properties.labels.type: flat_object } + # https://github.com/opensearch-project/OpenSearch/tree/main/rest-api-spec/src/main/resources/rest-api-spec/test#length + - length: { test_partial_flat_object.mappings.properties.issue.properties: 2 } + - length: { test_partial_flat_object.mappings.properties.issue.properties.labels: 1 } + + +--- +"Supported queries": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + + # Verify Document Count + - do: + search: + body: { + query: { + match_all: { } + } + } + + - length: { hits.hits: 3 } + + # Match Query with exact dot path. + - do: + search: + body: { + _source: true, + query: { + match: { "issue.labels.version": "2.1" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.version: "2.1" } + + # Match Query without exact dot path. + - do: + search: + body: { + _source: true, + query: { + match: { issue.labels: "2.1" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.version: "2.1" } + + # Multi Match Query with exact dot path. + - do: + search: + body: { + _source: true, + query: { + multi_match: { + "query": "2.0", + "fields": [ "issue.labels.version", "issue.labels.backport" ] + } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.labels.backport: [ "2.0", "1.9" ] } + - match: { hits.hits.1._source.issue.labels.backport: [ "2.0", "1.3" ] } + + # Term Query1 with exact dot path for date + - do: + search: + body: { + _source: true, + query: { + term: { issue.labels.createdDate: "2023-01-01" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.createdDate: "2023-01-01" } + + # Term Query1 without exact dot path for date + - do: + search: + body: { + _source: true, + query: { + term: { issue.labels: "2023-01-01" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.createdDate: "2023-01-01" } + + + # Term Query2 with dot path for string + - do: + search: + body: { + _source: true, + query: { + term: { "issue.labels.category.type": "API" } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.labels.category.type: "API" } + - match: { hits.hits.1._source.issue.labels.category.type: "API" } + + # Term Query2 without exact dot path. + - do: + search: + body: { + _source: true, + query: { + term: { issue.labels: "API" } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.labels.category.type: "API" } + - match: { hits.hits.1._source.issue.labels.category.type: "API" } + + # Term Query3 with dot path for array + - do: + search: + body: { + _source: true, + query: { + term: { issue.labels.backport: "1.9" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.backport: [ "2.0", "1.9" ] } + + # Term Query3 without dot path for array + - do: + search: + body: { + _source: true, + query: { + term: { issue.labels: "1.9" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.backport: [ "2.0", "1.9" ] } + + # Term Query4 with dot path for nested-array + - do: + search: + body: { + _source: true, + query: { + term: { issue.labels.comment: "LGTM" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Term Query4 without dot path for nested-array + - do: + search: + body: { + _source: true, + query: { + term: { issue.labels: "LGTM" } + } + } + + # Term Query5 with dot path for array + - do: + search: + body: { + _source: true, + query: { + term: { issue.labels.category.type: "Module" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.0.category.type: "Module" } + + # Term Query5 without dot path for array + - do: + search: + body: { + _source: true, + query: { + term: { issue.labels: "Module" } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.0.category.type: "Module" } + + # Terms Query without dot path. + - do: + search: + body: { + _source: true, + query: { + terms: { issue.labels: [ "John","Mike" ] } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + - match: { hits.hits.1._source.issue.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Terms Query with dot path. + - do: + search: + body: { + _source: true, + query: { + terms: { issue.labels.comment: [ "John","Mike" ] } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + - match: { hits.hits.1._source.issue.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Prefix Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "prefix": { + "issue.labels.comment": { + "value": "Mi" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Prefix Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "prefix": { + "issue.labels": { + "value": "Mi" + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Range Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "issue.labels.version": { + "gte": "2.1", + "lte": "3.0" + } + } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.labels.version: "2.2" } + - match: { hits.hits.1._source.issue.labels.version: "2.1" } + + # Range Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "issue.labels": { + "gte": "2.1", + "lte": "3.0" + } + } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.labels.version: "2.2" } + - match: { hits.hits.1._source.issue.labels.version: "2.1" } + + # Range Query with integer input with dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "issue.labels.views": { + "gte": 3000, + "lte": 4000 + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.views: 3333 } + + # Range Query with integer input without dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "issue.labels": { + "gte": 3000, + "lte": 4000 + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.views: 3333 } + + + # Range Query with double input with dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "issue.labels.priority": { + "gte": 4.1234, + "lte": 5.1234 + } + } + } + } + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.priority: 5.00 } + + # Range Query with double input without dot path. + - do: + search: + body: { + _source: true, + query: { + "range": { + "issue.labels": { + "gte": 4.1234, + "lte": 5.1234 + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.priority: 5.00 } + + + # Exists Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "exists": { + "field": issue.labels.priority + } + } + } + + - length: { hits.hits: 2 } + + # Exists Query with nested dot path, use the flat_object_field_name.last_key + - do: + search: + body: { + _source: true, + query: { + "exists": { + "field": issue.labels.type + } + } + } + + - length: { hits.hits: 3 } + + # Exists Query without dot path for the flat_object_field_name + - do: + search: + body: { + _source: true, + query: { + "exists": { + "field": issue.labels + } + } + } + + - length: { hits.hits: 3 } + + # Exists Query2 with dot path for one hit + - do: + search: + body: { + _source: true, + query: { + "exists": { + "field": issue.labels.author + } + } + } + + - length: { hits.hits: 1 } + + # Query_string Query without dot path. + - do: + search: + body: { + _source: true, + query: { + "query_string": { + "fields": [ "issue.labels" ], + "query": "Doe OR Mike" + } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + - match: { hits.hits.1._source.issue.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Query_string Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "query_string": { + "fields": [ "issue.labels.comment" ], + "query": "Doe OR Mike" + } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + - match: { hits.hits.1._source.issue.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Simple_query_string Query without full dot path. + - do: + search: + body: { + _source: true, + query: { + "simple_query_string": { + "query": "Doe", + "fields": [ "issue.labels" ] + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + + # Simple_query_string Query with dot path. + - do: + search: + body: { + _source: true, + query: { + "simple_query_string": { + "query": "Doe", + "fields": [ "issue.labels.comment" ] + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/105_partial_flat_object_nested.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/105_partial_flat_object_nested.yml new file mode 100644 index 0000000000000..ce172c2773e1f --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/105_partial_flat_object_nested.yml @@ -0,0 +1,636 @@ +--- +# The test setup includes: +# - Create flat_object mapping for test_partial_flat_object_nested index +# - Index two example documents +# - Refresh the index so it is ready for search tests + +setup: + - do: + indices.create: + index: test_partial_flat_object_nested + body: + mappings: + properties: + issue: + type: "nested" + properties: + number: + type: "integer" + labels: + type: "flat_object" + - do: + index: + index: test_partial_flat_object_nested + id: 1 + body: { + "issue": [ + { + "number": 123, + "labels": { + "version": "2.2", + "backport": [ + "2.0", + "1.9" + ], + "category": { + "type": "API", + "level": "bug" + }, + "createdDate": "2023-01-01", + "comment": [ [ "Doe","Shipped" ],[ "John","Approved" ] ], + "views": 288, + "priority": 5.00 + } + } + ] + } + + - do: + index: + index: test_partial_flat_object_nested + id: 2 + body: { + "issue": [ + { + "number": 456, + "labels": { + "author": "Liu", + "version": "2.1", + "backport": [ + "2.0", + "1.3" + ], + "category": { + "type": "API", + "level": "enhancement" + }, + "createdDate": "2023-02-01", + "comment": [ [ "Mike","LGTM" ],[ "John","Approved" ] ], + "views": 3333, + "priority": 1.50 + } + } + ] + } + + - do: + indices.refresh: + index: test_partial_flat_object_nested +--- +# Delete Index when connection is teardown +teardown: + - do: + indices.delete: + index: test_partial_flat_object_nested + + +--- +# Verify that mappings under the catalog field did not expand +# and no dynamic fields were created. +"Mappings": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + - do: + indices.get_mapping: + index: test_partial_flat_object_nested + - is_true: test_partial_flat_object_nested.mappings + - match: { test_partial_flat_object_nested.mappings.properties.issue.properties.number.type: integer } + - match: { test_partial_flat_object_nested.mappings.properties.issue.properties.labels.type: flat_object } + # https://github.com/opensearch-project/OpenSearch/tree/main/rest-api-spec/src/main/resources/rest-api-spec/test#length + - length: { test_partial_flat_object_nested.mappings.properties.issue.properties: 2 } + - length: { test_partial_flat_object_nested.mappings.properties.issue.properties.labels: 1 } + + +--- +"Supported queries": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + + # Verify Document Count + - do: + search: + body: { + query: { + match_all: { } + } + } + + - length: { hits.hits: 2 } + + # Match Query with exact dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + match: { "issue.labels.version": "2.1" } + } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.version: "2.1" } + + # Match Query without exact dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + match: { "issue.labels": "2.1" } + } + } + } + } + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.version: "2.1" } + + # Term Query1 with exact dot path for date + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + term: { issue.labels.createdDate: "2023-01-01" } + } + } } } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.createdDate: "2023-01-01" } + + # Term Query1 without exact dot path for date + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + term: { issue.labels: "2023-01-01" } + } } } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.createdDate: "2023-01-01" } + + # Term Query2 with dot path for string + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + term: { "issue.labels.category.type": "API" } + } } } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.0.labels.category.type: "API" } + - match: { hits.hits.1._source.issue.0.labels.category.type: "API" } + + # Term Query2 without exact dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + term: { issue.labels: "API" } + } } } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.0.labels.category.type: "API" } + - match: { hits.hits.1._source.issue.0.labels.category.type: "API" } + + # Term Query3 with dot path for array + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + term: { issue.labels.backport: "1.9" } + } } } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.backport: [ "2.0", "1.9" ] } + + # Term Query3 without dot path for array + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + term: { issue.labels: "1.9" } + } } } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.backport: [ "2.0", "1.9" ] } + + # Term Query4 with dot path for nested-array + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + term: { issue.labels.comment: "LGTM" } + } } } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Term Query4 without dot path for nested-array + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + term: { issue.labels: "LGTM" } } } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Terms Query without dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + terms: { issue.labels: [ "John","Mike" ] } } } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + - match: { hits.hits.1._source.issue.0.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Terms Query with dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + terms: { issue.labels.comment: [ "John","Mike" ] } } } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + - match: { hits.hits.1._source.issue.0.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Prefix Query with dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "prefix": { + "issue.labels.comment": { + "value": "Mi" + } } } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Prefix Query without dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "prefix": { + "issue.labels": { + "value": "Mi" + } + } + } } } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Range Query with dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "range": { + "issue.labels.version": { + "gte": "2.1", + "lte": "3.0" + } } } + } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.0.labels.version: "2.2" } + - match: { hits.hits.1._source.issue.0.labels.version: "2.1" } + + # Range Query without dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "range": { + "issue.labels": { + "gte": "2.1", + "lte": "3.0" + } } } + } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.0.labels.version: "2.2" } + - match: { hits.hits.1._source.issue.0.labels.version: "2.1" } + + # Range Query with integer input with dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "range": { + "issue.labels.views": { + "gte": 3000, + "lte": 4000 + } } } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.views: 3333 } + + # Range Query with integer input without dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "range": { + "issue.labels": { + "gte": 3000, + "lte": 4000 + } } } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.views: 3333 } + + + # Range Query with double input with dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "range": { + "issue.labels.priority": { + "gte": 4.1234, + "lte": 5.1234 + } } } + } + } + } + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.priority: 5.00 } + + # Range Query with double input without dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "range": { + "issue.labels": { + "gte": 4.1234, + "lte": 5.1234 + } } } + } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.priority: 5.00 } + + + # Exists Query with dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "exists": { + "field": issue.labels.priority + } } } + } + } + + - length: { hits.hits: 2 } + + # Exists Query with nested dot path, use the flat_object_field_name.last_key + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "exists": { + "field": issue.labels.type + } } } + } + } + + - length: { hits.hits: 2 } + + # Exists Query without dot path for the flat_object_field_name + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "exists": { + "field": issue.labels + } } } + } + } + + - length: { hits.hits: 2 } + + # Exists Query2 with dot path for one hit + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "exists": { + "field": issue.labels.author + } } } + } + } + + - length: { hits.hits: 1 } + + # Query_string Query without dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "query_string": { + "fields": [ "issue.labels" ], + "query": "Doe OR Mike" + } } } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + - match: { hits.hits.1._source.issue.0.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Query_string Query with dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "query_string": { + "fields": [ "issue.labels.comment" ], + "query": "Doe OR Mike" + } } } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + - match: { hits.hits.1._source.issue.0.labels.comment: [ [ "Mike","LGTM" ],[ "John","Approved" ] ] } + + # Simple_query_string Query without full dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "simple_query_string": { + "query": "Doe", + "fields": [ "issue.labels" ] + } } } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } + + # Simple_query_string Query with dot path. + - do: + search: + body: { + _source: true, + query: { + nested: { + path: "issue", + query: { + "simple_query_string": { + "query": "Doe", + "fields": [ "issue.labels.comment" ] + } } } + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.issue.0.labels.comment: [ [ "Doe","Shipped" ],[ "John","Approved" ] ] } diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index e0b37df5c1734..36e0adbbf057f 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -85,7 +85,7 @@ public static class Defaults { @Override public MappedFieldType keyedFieldType(String key) { - return new FlatObjectFieldType(this.name() + DOT_SYMBOL + key); + return new FlatObjectFieldType(this.name() + DOT_SYMBOL + key, this.name()); } /** @@ -186,6 +186,8 @@ public static final class FlatObjectFieldType extends StringFieldType { private final int ignoreAbove; private final String nullValue; + private final String mappedFieldTypeName; + private KeywordFieldMapper.KeywordFieldType valueFieldType; private KeywordFieldMapper.KeywordFieldType valueAndPathFieldType; @@ -195,10 +197,7 @@ public FlatObjectFieldType(String name, boolean isSearchable, boolean hasDocValu setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); this.ignoreAbove = Integer.MAX_VALUE; this.nullValue = null; - } - - public FlatObjectFieldType(String name) { - this(name, true, true, Collections.emptyMap()); + this.mappedFieldTypeName = null; } public FlatObjectFieldType(String name, FieldType fieldType) { @@ -212,12 +211,28 @@ public FlatObjectFieldType(String name, FieldType fieldType) { ); this.ignoreAbove = Integer.MAX_VALUE; this.nullValue = null; + this.mappedFieldTypeName = null; } public FlatObjectFieldType(String name, NamedAnalyzer analyzer) { super(name, true, false, true, new TextSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), Collections.emptyMap()); this.ignoreAbove = Integer.MAX_VALUE; this.nullValue = null; + this.mappedFieldTypeName = null; + } + + public FlatObjectFieldType(String name, String mappedFieldTypeName) { + super( + name, + true, + false, + true, + new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER), + Collections.emptyMap() + ); + this.ignoreAbove = Integer.MAX_VALUE; + this.nullValue = null; + this.mappedFieldTypeName = mappedFieldTypeName; } void setValueFieldType(KeywordFieldMapper.KeywordFieldType valueFieldType) { @@ -356,22 +371,21 @@ public Query termsQuery(List values, QueryShardContext context) { * @return directedSubFieldName */ public String directSubfield() { - if (name().contains(DOT_SYMBOL)) { - String[] dotPathList = name().split("\\."); - return dotPathList[0] + VALUE_AND_PATH_SUFFIX; + if (mappedFieldTypeName == null) { + return new StringBuilder().append(this.name()).append(VALUE_SUFFIX).toString(); } else { - return this.valueFieldType.name(); + return new StringBuilder().append(this.mappedFieldTypeName).append(VALUE_AND_PATH_SUFFIX).toString(); } } /** - * If the search key is assigned with value, - * the dot path was used in search query, then - * rewrite the searchValueString as the format "dotpath=value", + * If the search key has mappedFieldTypeName as prefix, + * then the dot path was used in search query, + * then rewrite the searchValueString as the format "dotpath=value", * @return rewriteSearchValue */ public String rewriteValue(String searchValueString) { - if (!name().contains(DOT_SYMBOL)) { + if (!hasMappedFieldTyeNameInQueryFieldName(name())) { return searchValueString; } else { String rewriteSearchValue = new StringBuilder().append(name()).append(EQUAL_SYMBOL).append(searchValueString).toString(); @@ -380,6 +394,23 @@ public String rewriteValue(String searchValueString) { } + private boolean hasMappedFieldTyeNameInQueryFieldName(String input) { + String prefix = this.mappedFieldTypeName; + if (prefix == null) { + return false; + } + if (!input.startsWith(prefix)) { + return false; + } + String rest = input.substring(prefix.length()); + + if (rest.isEmpty()) { + return false; + } else { + return true; + } + } + private String inputToString(Object inputValue) { if (inputValue instanceof Integer) { String inputToString = Integer.toString((Integer) inputValue); @@ -460,15 +491,15 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower } /** - * if there is dot path. query the field name in flatObject parent field. + * if there is dot path. query the field name in flatObject parent field (mappedFieldTypeName). * else query in _field_names system field */ @Override public Query existsQuery(QueryShardContext context) { String searchKey; String searchField; - if (name().contains(DOT_SYMBOL)) { - searchKey = name().split("\\.")[0]; + if (hasMappedFieldTyeNameInQueryFieldName(name())) { + searchKey = this.mappedFieldTypeName; searchField = name(); } else { searchKey = FieldNamesFieldMapper.NAME; diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java new file mode 100644 index 0000000000000..9ec053dc59d10 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldTypeTests.java @@ -0,0 +1,134 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.TermQuery; +import org.opensearch.Version; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.analysis.AnalyzerScope; +import org.opensearch.index.analysis.NamedAnalyzer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class FlatObjectFieldTypeTests extends FieldTypeTestCase { + private static MappedFieldType getFlatParentFieldType(String fieldName) { + Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build(); + Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath()); + MappedFieldType flatParentFieldType = new FlatObjectFieldMapper.Builder(fieldName).build(context).fieldType(); + return flatParentFieldType; + } + + public void testFetchSourceValue() throws IOException { + MappedFieldType mapper = getFlatParentFieldType("field"); + + Map jsonPoint = new HashMap<>(); + jsonPoint.put("type", "flat_object"); + jsonPoint.put("coordinates", Arrays.asList(42.0, 27.1)); + Map otherJsonPoint = new HashMap<>(); + otherJsonPoint.put("type", "Point"); + otherJsonPoint.put("coordinates", Arrays.asList(30.0, 50.0)); + + ArrayList jsonPointList = new ArrayList<>(); + jsonPointList.add(jsonPoint.toString()); + + ArrayList otherJsonPointList = new ArrayList<>(); + otherJsonPointList.add(otherJsonPoint.toString()); + + assertEquals(jsonPointList, fetchSourceValue(mapper, jsonPoint, null)); + assertEquals(otherJsonPointList, fetchSourceValue(mapper, otherJsonPoint, null)); + + } + + public void testDirectSubfield() { + { + MappedFieldType flatParentFieldType = getFlatParentFieldType("field"); + + // when searching for "foo" in "field", the directSubfield is field._value field + String searchFieldName = ((FlatObjectFieldMapper.FlatObjectFieldType) flatParentFieldType).directSubfield(); + assertEquals("field._value", searchFieldName); + + MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType("bar", flatParentFieldType.name()); + // when searching for "foo" in "field.bar", the directSubfield is field._valueAndPath field + String searchFieldNameDocPath = ((FlatObjectFieldMapper.FlatObjectFieldType) dynamicMappedFieldType).directSubfield(); + assertEquals("field._valueAndPath", searchFieldNameDocPath); + } + { + NamedAnalyzer analyzer = new NamedAnalyzer("default", AnalyzerScope.INDEX, null); + MappedFieldType ft = new FlatObjectFieldMapper.FlatObjectFieldType("field", analyzer); + assertEquals("field._value", ((FlatObjectFieldMapper.FlatObjectFieldType) ft).directSubfield()); + } + } + + public void testRewriteValue() { + MappedFieldType flatParentFieldType = getFlatParentFieldType("field"); + + // when searching for "foo" in "field", the rewrite value is "foo" + String searchValues = ((FlatObjectFieldMapper.FlatObjectFieldType) flatParentFieldType).rewriteValue("foo"); + assertEquals("foo", searchValues); + + MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType("field.bar", flatParentFieldType.name()); + + // when searching for "foo" in "field.bar", the rewrite value is "field.bar=foo" + String searchFieldNameDocPath = ((FlatObjectFieldMapper.FlatObjectFieldType) dynamicMappedFieldType).directSubfield(); + String searchValuesDocPath = ((FlatObjectFieldMapper.FlatObjectFieldType) dynamicMappedFieldType).rewriteValue("foo"); + assertEquals("field.bar=foo", searchValuesDocPath); + } + + public void testTermQuery() { + + MappedFieldType flatParentFieldType = getFlatParentFieldType("field"); + + // when searching for "foo" in "field", the term query is directed to search "foo" in field._value field + String searchFieldName = ((FlatObjectFieldMapper.FlatObjectFieldType) flatParentFieldType).directSubfield(); + String searchValues = ((FlatObjectFieldMapper.FlatObjectFieldType) flatParentFieldType).rewriteValue("foo"); + assertEquals("foo", searchValues); + assertEquals(new TermQuery(new Term(searchFieldName, searchValues)), flatParentFieldType.termQuery(searchValues, null)); + + MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType("field.bar", flatParentFieldType.name()); + + // when searching for "foo" in "field.bar", the term query is directed to search in field._valueAndPath field + String searchFieldNameDocPath = ((FlatObjectFieldMapper.FlatObjectFieldType) dynamicMappedFieldType).directSubfield(); + String searchValuesDocPath = ((FlatObjectFieldMapper.FlatObjectFieldType) dynamicMappedFieldType).rewriteValue("foo"); + assertEquals("field.bar=foo", searchValuesDocPath); + assertEquals(new TermQuery(new Term(searchFieldNameDocPath, searchValuesDocPath)), dynamicMappedFieldType.termQuery("foo", null)); + + MappedFieldType unsearchable = new FlatObjectFieldMapper.FlatObjectFieldType("field", false, true, Collections.emptyMap()); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null)); + assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + } + + public void testExistsQuery() { + { + MappedFieldType ft = getFlatParentFieldType("field"); + // when checking on the flat_object field name "field", check if exist in the field mapper names + assertEquals(new TermQuery(new Term(FieldNamesFieldMapper.NAME, "field")), ft.existsQuery(null)); + + // when checking if a subfield within the flat_object, for example, "field.bar", use term query in the flat_object field + MappedFieldType dynamicMappedFieldType = new FlatObjectFieldMapper.FlatObjectFieldType("field.bar", ft.name()); + assertEquals(new TermQuery(new Term("field", "field.bar")), dynamicMappedFieldType.existsQuery(null)); + + } + { + FlatObjectFieldMapper.FlatObjectFieldType ft = new FlatObjectFieldMapper.FlatObjectFieldType( + "field", + true, + false, + Collections.emptyMap() + ); + assertEquals(new TermQuery(new Term(FieldNamesFieldMapper.NAME, "field")), ft.existsQuery(null)); + } + } +}