diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index 58d9d177d1c331..5953ec1319ccdd 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -248,7 +248,9 @@ Status NewJsonReader::get_columns(std::unordered_map* col_names, std::vector* col_types) { RETURN_IF_ERROR(_get_range_params()); - + // create decompressor. + // _decompressor may be nullptr if this is not a compressed file + RETURN_IF_ERROR(Decompressor::create_decompressor(_file_compress_type, &_decompressor)); RETURN_IF_ERROR(_open_file_reader(true)); if (_read_json_by_line) { RETURN_IF_ERROR(_open_line_reader()); diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json_format_test/simple_object_json.json.gz b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json_format_test/simple_object_json.json.gz new file mode 100644 index 00000000000000..8a6db90241ffc2 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/json_format_test/simple_object_json.json.gz differ diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out index a8f5dcf5396932..04ec58cdbaefc6 100644 --- a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out +++ b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out @@ -286,6 +286,20 @@ 8 chengdu 2345678 9 xian 2345679 +-- !json_compressed -- +1 beijing 2345671 +10 hefei 23456710 +11 \N 23456711 +12 hefei \N +2 shanghai 2345672 +3 guangzhou 2345673 +4 shenzhen 2345674 +5 hangzhou 2345675 +6 nanjing 2345676 +7 wuhan 2345677 +8 chengdu 2345678 +9 xian 2345679 + -- !json_limit1 -- 1 beijing 2345671 10 hefei 23456710 diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy index 74cb1e320aaa16..8bc8194843d9c6 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy @@ -143,6 +143,16 @@ suite("test_hdfs_tvf","external,hive,tvf,external_docker") { "strip_outer_array" = "false", "read_json_by_line" = "true") order by id; """ + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json.gz" + format = "json" + qt_json_compressed """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "compress_type" = "GZ", + "strip_outer_array" = "false", + "read_json_by_line" = "true") order by id; """ + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json" format = "json"