diff --git a/be/src/vec/exec/format/text/text_reader.cpp b/be/src/vec/exec/format/text/text_reader.cpp index 7913a9bdb2bcdf..ab7f92f489f0cf 100644 --- a/be/src/vec/exec/format/text/text_reader.cpp +++ b/be/src/vec/exec/format/text/text_reader.cpp @@ -165,11 +165,9 @@ Status TextReader::_validate_line(const Slice& line, bool* success) { Status TextReader::_deserialize_nullable_string(IColumn& column, Slice& slice) { auto& null_column = assert_cast(column); - if (_options.null_len > 0) { - if (slice.compare(Slice(_options.null_format, _options.null_len)) == 0) { - null_column.insert_data(nullptr, 0); - return Status::OK(); - } + if (slice.compare(Slice(_options.null_format, _options.null_len)) == 0) { + null_column.insert_data(nullptr, 0); + return Status::OK(); } static DataTypeStringSerDe stringSerDe; auto st = stringSerDe.deserialize_one_cell_from_hive_text(null_column.get_nested_column(), diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql index 0368547f8be224..81bdf03da8e6c4 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql @@ -170,3 +170,33 @@ INSERT INTO TABLE test_open_csv_standard_prop VALUES INSERT INTO TABLE test_open_csv_custom_prop VALUES (1, 'John Doe', 28, 50000.75, true, '2022-01-15', '2023-10-21 14:30:00', 4.5, 'Senior Developer'), (2, 'Jane,Smith', NULL, NULL, false, '2020-05-20', NULL, NULL, '\"Project Manager\"'); + +CREATE TABLE test_empty_null_format_text ( + id INT, + name STRING +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +STORED AS TEXTFILE +TBLPROPERTIES ( + "serialization.null.format"="" +); + +INSERT INTO TABLE test_empty_null_format_text VALUES + (1, 'Alice'), + (2, NULL), + (3, ''); + +CREATE TABLE test_empty_null_defined_text ( + id INT, + name STRING +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +NULL DEFINED AS '' +STORED AS TEXTFILE; + +INSERT INTO TABLE test_empty_null_defined_text VALUES + (1, 'Alice'), + (2, NULL), + (3, ''); \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java index 9874c18c5b3f9c..cf3f02a25f3fe8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreClientHelper.java @@ -863,7 +863,7 @@ public static Optional getSerdeProperty(Table table, String key) { private static Optional firstNonNullable(String... values) { for (String value : values) { - if (!Strings.isNullOrEmpty(value)) { + if (value != null) { return Optional.of(value); } } @@ -884,8 +884,10 @@ public static String firstPresentOrDefault(String defaultValue, Optional * * @param altValue * The string containing a number. + * @param defValue + * The default value to return if altValue is invalid. */ - public static String getByte(String altValue) { + public static String getByte(String altValue, String defValue) { if (altValue != null && altValue.length() > 0) { try { return Character.toString((char) ((Byte.parseByte(altValue) + 256) % 256)); @@ -893,6 +895,6 @@ public static String getByte(String altValue) { return altValue.substring(0, 1); } } - return null; + return defValue; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java index 1be78e41b89b88..36c147da142e75 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveProperties.java @@ -89,8 +89,8 @@ public static String getFieldDelimiter(Table table, boolean supportMultiChar) { Optional fieldDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_FIELD_DELIMITER); Optional serFormat = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_SERIALIZATION_FORMAT); String delimiter = HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_FIELD_DELIMITER, fieldDelim, serFormat); - return supportMultiChar ? delimiter : HiveMetaStoreClientHelper.getByte(delimiter); + "", fieldDelim, serFormat); + return supportMultiChar ? delimiter : HiveMetaStoreClientHelper.getByte(delimiter, DEFAULT_FIELD_DELIMITER); } public static String getSeparatorChar(Table table) { @@ -102,13 +102,13 @@ public static String getSeparatorChar(Table table) { public static String getLineDelimiter(Table table) { Optional lineDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_LINE_DELIMITER); return HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_LINE_DELIMITER, lineDelim)); + "", lineDelim), DEFAULT_LINE_DELIMITER); } public static String getMapKvDelimiter(Table table) { Optional mapkvDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_MAP_KV_DELIMITER); return HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_MAP_KV_DELIMITER, mapkvDelim)); + "", mapkvDelim), DEFAULT_MAP_KV_DELIMITER); } public static String getCollectionDelimiter(Table table) { @@ -117,18 +117,13 @@ public static String getCollectionDelimiter(Table table) { Optional collectionDelimHive3 = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_COLLECTION_DELIMITER_HIVE3); return HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault( - DEFAULT_COLLECTION_DELIMITER, collectionDelimHive2, collectionDelimHive3)); + "", collectionDelimHive2, collectionDelimHive3), DEFAULT_COLLECTION_DELIMITER); } public static Optional getEscapeDelimiter(Table table) { Optional escapeDelim = HiveMetaStoreClientHelper.getSerdeProperty(table, PROP_ESCAPE_DELIMITER); if (escapeDelim.isPresent()) { - String escape = HiveMetaStoreClientHelper.getByte(escapeDelim.get()); - if (escape != null) { - return Optional.of(escape); - } else { - return Optional.of(DEFAULT_ESCAPE_DELIMIER); - } + return Optional.of(HiveMetaStoreClientHelper.getByte(escapeDelim.get(), DEFAULT_ESCAPE_DELIMIER)); } return Optional.empty(); } diff --git a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out index c2415c058f14f1..cda92c0519ad51 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out +++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out @@ -51,6 +51,28 @@ b 2.2 1 John Doe 28 50000.75 TRUE 2022-01-15 2023-10-21 14:30:00 4.5 Senior Developer 2 Jane,Smith 2020-05-20 "Project Manager" +-- !test_empty_null_format_text -- +1 Alice +2 \N +3 \N + +-- !test_empty_null_format_text2 -- +2 \N +3 \N + +-- !test_empty_null_format_text3 -- + +-- !test_empty_null_defined_text -- +1 Alice +2 \N +3 \N + +-- !test_empty_null_defined_text2 -- +2 \N +3 \N + +-- !test_empty_null_defined_text3 -- + -- !1 -- a 1.1 b 2.2 @@ -103,3 +125,25 @@ b 2.2 1 John Doe 28 50000.75 TRUE 2022-01-15 2023-10-21 14:30:00 4.5 Senior Developer 2 Jane,Smith FALSE 2020-05-20 "Project Manager" +-- !test_empty_null_format_text -- +1 Alice +2 \N +3 \N + +-- !test_empty_null_format_text2 -- +2 \N +3 \N + +-- !test_empty_null_format_text3 -- + +-- !test_empty_null_defined_text -- +1 Alice +2 \N +3 \N + +-- !test_empty_null_defined_text2 -- +2 \N +3 \N + +-- !test_empty_null_defined_text3 -- + diff --git a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy index 52cdd25eb07b2c..d4bb051214d724 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy @@ -55,6 +55,14 @@ suite("test_hive_serde_prop", "external_docker,hive,external_docker_hive,p0,exte qt_test_open_csv_default_prop """select * from ${catalog_name}.regression.test_open_csv_default_prop order by id;""" qt_test_open_csv_standard_prop """select * from ${catalog_name}.regression.test_open_csv_standard_prop order by id;""" qt_test_open_csv_custom_prop """select * from ${catalog_name}.regression.test_open_csv_custom_prop order by id;""" + + qt_test_empty_null_format_text """select * from ${catalog_name}.regression.test_empty_null_format_text order by id;""" + qt_test_empty_null_format_text2 """select * from ${catalog_name}.regression.test_empty_null_format_text where name is null order by id;""" + qt_test_empty_null_format_text3 """select * from ${catalog_name}.regression.test_empty_null_format_text where name = '' order by id;""" + + qt_test_empty_null_defined_text """select * from ${catalog_name}.regression.test_empty_null_defined_text order by id;""" + qt_test_empty_null_defined_text2 """select * from ${catalog_name}.regression.test_empty_null_defined_text where name is null order by id;""" + qt_test_empty_null_defined_text3 """select * from ${catalog_name}.regression.test_empty_null_defined_text where name = '' order by id;""" } }