Skip to content

Commit

Permalink
[fix](csv_reader)fix bug that Read garbled files caused be crash. (#2…
Browse files Browse the repository at this point in the history
…4164)

fix bug that read garbled files caused be crash.
  • Loading branch information
hubgeter authored Sep 13, 2023
1 parent 9916324 commit e30c3f3
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 7 deletions.
3 changes: 2 additions & 1 deletion be/src/exec/text_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,9 +330,10 @@ bool TextConverter::_write_data(const TypeDescriptor& type_desc,
kv = i;
continue;
}
if (i == len || data[i] == _collection_delimiter) {
if ((i == len || data[i] == _collection_delimiter) && i >= kv + 1) {
ranges.push_back({from, kv, i - 1});
from = i + 1;
kv = from;
}
}

Expand Down
9 changes: 7 additions & 2 deletions be/src/vec/data_types/serde/data_type_array_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ Status DataTypeArraySerDe::deserialize_column_from_json_vector(IColumn& column,

Status DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options) const {
DCHECK(!slice.empty());
if (slice.empty()) {
return Status::InvalidArgument("slice is empty!");
}
auto& array_column = assert_cast<ColumnArray&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_column = array_column.get_data();
Expand Down Expand Up @@ -132,6 +134,9 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice
Status DataTypeArraySerDe::deserialize_one_cell_from_hive_text(IColumn& column, Slice& slice,
const FormatOptions& options,
int nesting_level) const {
if (slice.empty()) {
return Status::InvalidArgument("slice is empty!");
}
auto& array_column = assert_cast<ColumnArray&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_column = array_column.get_data();
Expand Down Expand Up @@ -303,4 +308,4 @@ Status DataTypeArraySerDe::write_column_to_mysql(const IColumn& column,
}

} // namespace vectorized
} // namespace doris
} // namespace doris
11 changes: 8 additions & 3 deletions be/src/vec/data_types/serde/data_type_map_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ void DataTypeMapSerDe::serialize_one_cell_to_json(const IColumn& column, int row
Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text(IColumn& column, Slice& slice,
const FormatOptions& options,
int nesting_level) const {
DCHECK(!slice.empty());
if (slice.empty()) {
return Status::InvalidArgument("slice is empty!");
}
auto& array_column = assert_cast<ColumnMap&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_key_column = array_column.get_keys();
Expand All @@ -92,10 +94,11 @@ Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text(IColumn& column, Sl
kv = i;
continue;
}
if (i == slice.size || slice[i] == collection_delimiter) {
if ((i == slice.size || slice[i] == collection_delimiter) && i >= kv + 1) {
key_slices.push_back({slice.data + from, kv - from});
value_slices.push_back({slice.data + kv + 1, i - 1 - kv});
from = i + 1;
kv = from;
}
}

Expand Down Expand Up @@ -169,7 +172,9 @@ Status DataTypeMapSerDe::deserialize_column_from_json_vector(IColumn& column,

Status DataTypeMapSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options) const {
DCHECK(!slice.empty());
if (slice.empty()) {
return Status::InvalidArgument("slice is empty!");
}
auto& array_column = assert_cast<ColumnMap&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_key_column = array_column.get_keys();
Expand Down
5 changes: 4 additions & 1 deletion be/src/vec/data_types/serde/data_type_struct_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ void DataTypeStructSerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWr
Status DataTypeStructSerDe::deserialize_one_cell_from_hive_text(IColumn& column, Slice& slice,
const FormatOptions& options,
int nesting_level) const {
if (slice.empty()) {
return Status::InvalidArgument("slice is empty!");
}
char struct_delimiter = options.get_collection_delimiter(nesting_level);

std::vector<Slice> slices;
Expand Down Expand Up @@ -190,4 +193,4 @@ Status DataTypeStructSerDe::write_column_to_mysql(const IColumn& column,
}

} // namespace vectorized
} // namespace doris
} // namespace doris
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_text_garbled_file", "p2,external,hive,external_remote,external_remote_hive") {
//test hive garbled files , prevent be hanged

String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
String catalog_name = "test_text_garbled_file"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hadoop.username' = 'hadoop',
'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)


order_qt_garbled_file """
select * from ${catalog_name}.multi_catalog.test_csv_format_error;
"""


}
}

0 comments on commit e30c3f3

Please sign in to comment.