diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index 63a199199a0d10..850ac5766fc119 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -247,4 +247,25 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone, return Status::OK(); } +Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json( + IColumn& column, Slice& slice, int rows, int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + + DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); +} + +void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& column, + int times) const { + auto& col = static_cast&>(column); + auto sz = col.size(); + UInt64 val = col.get_element(sz - 1); + col.insert_many_vals(val, times); +} + } // namespace doris::vectorized diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index 00b05f5fcd6230..ef4aa6843a068c 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -77,6 +77,11 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { int start, int end, std::vector& buffer_list) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; + void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index eb9122dd2408f3..f2d595b87c452f 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -175,5 +175,26 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& timezone, con return Status::OK(); } +Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column, Slice& slice, + int rows, int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + DataTypeDateV2SerDe::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); +} + +void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& column, + int times) const { + auto& col = static_cast&>(column); + auto sz = col.size(); + UInt32 val = col.get_element(sz - 1); + + col.insert_many_vals(val, times); +} + } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index 9a8b050eeba4a6..52e4cec364ebb6 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -74,6 +74,12 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe { int start, int end, std::vector& buffer_list) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; + + void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index a59fdedbfe6991..e979211d6d720b 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -275,6 +275,32 @@ Status DataTypeDecimalSerDe::write_column_to_orc(const std::string& timezone, } return Status::OK(); } +template + +Status DataTypeDecimalSerDe::deserialize_column_from_fixed_json( + IColumn& column, Slice& slice, int rows, int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + + DataTypeDecimalSerDe::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); +} + +template +void DataTypeDecimalSerDe::insert_column_last_value_multiple_times(IColumn& column, + int times) const { + auto& col = static_cast&>(column); + auto sz = col.size(); + + T val = col.get_element(sz - 1); + for (int i = 0; i < times; i++) { + col.insert_value(val); + } +} template class DataTypeDecimalSerDe; template class DataTypeDecimalSerDe; diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 55e68699f01b13..484c6686bc58f8 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -114,6 +114,12 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { int start, int end, std::vector& buffer_list) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; + + void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index faa3c8eb1f45f7..98ff1eb7f81b9b 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -127,6 +127,26 @@ Status DataTypeNullableSerDe::deserialize_column_from_hive_text_vector( return Status::OK(); } +Status DataTypeNullableSerDe::deserialize_column_from_fixed_json( + IColumn& column, Slice& slice, int rows, int* num_deserialized, + const FormatOptions& options) const { + auto& col = static_cast(column); + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + auto& null_map = col.get_null_map_data(); + auto& nested_column = col.get_nested_column(); + + null_map.resize_fill( + rows, null_map.back()); // data_type_nullable::insert_column_last_value_multiple_times() + if (rows - 1 != 0) { + nested_serde->insert_column_last_value_multiple_times(nested_column, rows - 1); + } + *num_deserialized = rows; + return Status::OK(); +} + Status DataTypeNullableSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& null_column = assert_cast(column); diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 09d2fbde409acb..7b4841dcbdfd71 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -47,6 +47,9 @@ class DataTypeNullableSerDe : public DataTypeSerDe { int* num_deserialized, const FormatOptions& options) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; Status deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index 0ba338ce39909f..299779ea267961 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -224,6 +224,28 @@ void DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, const auto* raw_data = reinterpret_cast(buffer->data()) + start; col_data.insert(raw_data, raw_data + row_count); } +template +Status DataTypeNumberSerDe::deserialize_column_from_fixed_json( + IColumn& column, Slice& slice, int rows, int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + + DataTypeNumberSerDe::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); +} + +template +void DataTypeNumberSerDe::insert_column_last_value_multiple_times(IColumn& column, + int times) const { + auto& col = static_cast&>(column); + auto sz = col.size(); + T val = col.get_element(sz - 1); + col.insert_many_vals(val, times); +} template template diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index c66bc994605115..18ba2fb26c79b5 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -70,6 +70,12 @@ class DataTypeNumberSerDe : public DataTypeSerDe { int* num_deserialized, const FormatOptions& options) const override; + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override; + + void insert_column_last_value_multiple_times(IColumn& column, int times) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 77663e1d43a2a8..1f6e24aef3ffff 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -234,6 +234,27 @@ class DataTypeSerDe { virtual Status deserialize_column_from_json_vector(IColumn& column, std::vector& slices, int* num_deserialized, const FormatOptions& options) const = 0; + // deserialize fixed values.Repeatedly insert the value row times into the column. + virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + *num_deserialized = 0; + return st; + } + insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); + } + // Insert the last value to the end of this column multiple times. + virtual void insert_column_last_value_multiple_times(IColumn& column, int times) const { + //If you try to simplify this operation by using `column.insert_many_from(column, column.size() - 1, rows - 1);` + // you are likely to get incorrect data results. + MutableColumnPtr dum_col = column.clone_empty(); + dum_col->insert_from(column, column.size() - 1); + column.insert_many_from(*dum_col.get(), 0, times); + } virtual Status deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index b74b585708623f..0f0f1d0dfe88ea 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -132,6 +132,31 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { } return Status::OK(); } + + Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, + int* num_deserialized, + const FormatOptions& options) const override { + Status st = deserialize_one_cell_from_json(column, slice, options); + if (!st.ok()) { + return st; + } + + DataTypeStringSerDeBase::insert_column_last_value_multiple_times(column, rows - 1); + *num_deserialized = rows; + return Status::OK(); + } + + void insert_column_last_value_multiple_times(IColumn& column, int times) const override { + auto& col = static_cast(column); + auto sz = col.size(); + + StringRef ref = col.get_data_at(sz - 1); + String str(ref.data, ref.size); + std::vector refs(times, {str.data(), str.size()}); + + col.insert_many_strings(refs.data(), refs.size()); + } + Status read_column_from_pb(IColumn& column, const PValues& arg) const override { auto& column_dest = assert_cast(column); column_dest.reserve(column_dest.size() + arg.string_value_size()); diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 16909f0023ae11..54d94dcecc7194 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -935,13 +935,10 @@ Status OrcReader::_fill_partition_columns( auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); - vector slices(rows); - for (int i = 0; i < rows; i++) { - slices[i] = {value.data(), value.size()}; - } int num_deserialized = 0; - if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, &num_deserialized, - _text_formatOptions) != Status::OK()) { + if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, + &num_deserialized, + _text_formatOptions) != Status::OK()) { return Status::InternalError("Failed to fill partition column: {}={}", slot_desc->col_name(), value); } diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 5e824f34817c1f..9ec1235be1d22b 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -631,13 +631,10 @@ Status RowGroupReader::_fill_partition_columns( auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); - vector slices(rows); - for (int i = 0; i < rows; i++) { - slices[i] = {value.data(), value.size()}; - } int num_deserialized = 0; - if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, &num_deserialized, - _text_formatOptions) != Status::OK()) { + if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, + &num_deserialized, + _text_formatOptions) != Status::OK()) { return Status::InternalError("Failed to fill partition column: {}={}", slot_desc->col_name(), value); } diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index dcfb404ae5a5b3..1738c3fac041b6 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -495,13 +495,10 @@ Status VFileScanner::_fill_columns_from_path(size_t rows) { auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); - vector slices(rows); - for (int i = 0; i < rows; i++) { - slices[i] = {value.data(), value.size()}; - } int num_deserialized = 0; - if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, &num_deserialized, - _text_formatOptions) != Status::OK()) { + if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, + &num_deserialized, + _text_formatOptions) != Status::OK()) { return Status::InternalError("Failed to fill partition column: {}={}", slot_desc->col_name(), value); }