diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/test_flatten_data.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/test_flatten_data.json new file mode 100644 index 00000000000000..d06811006281b2 --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/test_flatten_data.json @@ -0,0 +1,26 @@ +{ + "chatFiles": null, + "knowledgeFiles": null, + "extra": { + "action": "中文", + "action_id": "123", + "start_time": "2025/10/30 10:35:25", + "ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/5", + "mip": "12345", + "design_id": "67890", + "design_type": "A4", + "consume_time": 24, + "inputs": { + "prompt": "", + "image_url": "https://iamge/20251030022847441114matting-img_00001_.png", + "batch_size": 3 + }, + "outputs": [ + {"text": "text1"}, + {"text": "text2"}, + {"text": "text3"} + ] + }, + "docId": "b71db1f7", + "recordType": "R1" +} diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh index 7344dc09a27be4..d45e4b5fb4a7b4 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh @@ -132,6 +132,12 @@ curl "http://${ES_7_HOST}:9200/test2_20220809" -H "Content-Type:application/json curl "http://${ES_7_HOST}:9200/test3_20231005" -H "Content-Type:application/json" -X PUT -d '@/mnt/scripts/index/es7_test3.json' # create index test_object for object type testing (empty table) curl "http://${ES_7_HOST}:9200/test_object" -H "Content-Type:application/json" -X PUT -d '@/mnt/scripts/index/es7_test_object.json' + +# create index test_flatten to test flatten type +curl "http://${ES_7_HOST}:9200/test_flatten" -H "Content-Type:application/json" -X PUT -d '@/mnt/scripts/index/es7_test_flatten.json' +# put data for test_flatten +curl "http://${ES_7_HOST}:9200/test_flatten/_doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/test_flatten_data.json' + # put data for tese1 curl "http://${ES_7_HOST}:9200/test1/_doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1.json' curl "http://${ES_7_HOST}:9200/test1/_doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2.json' diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test_flatten.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test_flatten.json new file mode 100644 index 00000000000000..d24c040aad1bf9 --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test_flatten.json @@ -0,0 +1,25 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0 + }, + "mappings": { + "properties": { + "chatFiles": { + "type": "keyword" + }, + "knowledgeFiles": { + "type": "keyword" + }, + "extra": { + "type": "flattened" + }, + "docId": { + "type": "keyword" + }, + "recordType": { + "type": "keyword" + } + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/es/EsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/es/EsUtil.java index a350f386ada9cb..f1e907bfab9095 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/es/EsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/es/EsUtil.java @@ -303,6 +303,7 @@ private static Column parseEsField(String fieldName, ObjectNode fieldValue, List // When ES table is empty, object fields still have explicit "type": "object" in mapping case "object": case "nested": + case "flattened": type = Type.JSONB; break; default: diff --git a/regression-test/data/external_table_p0/es/test_es_flatten_type.out b/regression-test/data/external_table_p0/es/test_es_flatten_type.out new file mode 100644 index 00000000000000..8f2ac5d2a31fc7 --- /dev/null +++ b/regression-test/data/external_table_p0/es/test_es_flatten_type.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +\N b71db1f7 {"outputs":[{"text":"text1"},{"text":"text2"},{"text":"text3"}],"start_time":"2025/10/30 10:35:25","consume_time":24,"action_id":"123","mip":"12345","design_id":"67890","inputs":{"batch_size":3,"image_url":"https://iamge/20251030022847441114matting-img_00001_.png","prompt":""},"action":"中文","design_type":"A4","ua":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/5"} \N R1 + +-- !sql2 -- +\N b71db1f7 ["","123","12345","2025/10/30 10:35:25","24","3","67890","A4","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/5","https://iamge/20251030022847441114matting-img_00001_.png","text1","text2","text3","中文"] \N R1 + +-- !sql3 -- +{"outputs":[{"text":"text1"},{"text":"text2"},{"text":"text3"}],"start_time":"2025/10/30 10:35:25","consume_time":24,"action_id":"123","mip":"12345","design_id":"67890","inputs":{"batch_size":3,"image_url":"https://iamge/20251030022847441114matting-img_00001_.png","prompt":""},"action":"中文","design_type":"A4","ua":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/5"} + +-- !sql4 -- +["","123","12345","2025/10/30 10:35:25","24","3","67890","A4","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/5","https://iamge/20251030022847441114matting-img_00001_.png","text1","text2","text3","中文"] + diff --git a/regression-test/suites/external_table_p0/es/test_es_flatten_type.groovy b/regression-test/suites/external_table_p0/es/test_es_flatten_type.groovy new file mode 100644 index 00000000000000..7428dd386bcca1 --- /dev/null +++ b/regression-test/suites/external_table_p0/es/test_es_flatten_type.groovy @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_es_flatten_type", "p0,external,es,external_docker,external_docker_es") { + String enabled = context.config.otherConfigs.get("enableEsTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String es_7_port = context.config.otherConfigs.get("es_7_port") + + sql """drop catalog if exists test_es7_flatten_type;""" + + sql """create catalog test_es_query_es7_false properties( + "type"="es", + "hosts"="http://${externalEnvIp}:$es_7_port", + "nodes_discovery"="false", + "enable_keyword_sniff"="true", + "enable_docvalue_scan" = "false" + ); + """ + + sql """create catalog test_es_query_es7_true properties( + "type"="es", + "hosts"="http://${externalEnvIp}:$es_7_port", + "nodes_discovery"="false", + "enable_keyword_sniff"="true", + "enable_docvalue_scan" = "true" + ); + """ + + order_qt_sql1 "select * from test_es_query_es7_false.default_db.test_flatten"; + order_qt_sql2 "select * from test_es_query_es7_true.default_db.test_flatten"; + order_qt_sql3 "select extra from test_es_query_es7_false.default_db.test_flatten"; + order_qt_sql4 "select extra from test_es_query_es7_true.default_db.test_flatten"; + + } +}