diff --git a/be/src/util/slice.h b/be/src/util/slice.h index 57865b50e3e65d..84aec06c400aba 100644 --- a/be/src/util/slice.h +++ b/be/src/util/slice.h @@ -119,6 +119,54 @@ struct Slice { size -= n; } + /// Drop the last "n" bytes from this slice. + /// + /// @pre n <= size + /// + /// @note Only the base and bounds of the slice are changed; + /// the data is not modified. + /// + /// @param [in] n + /// Number of bytes that should be dropped from the last. + void remove_suffix(size_t n) { + assert(n <= size); + size -= n; + } + + /// Remove leading spaces. + /// + /// @pre n <= size + /// + /// @note Only the base and bounds of the slice are changed; + /// the data is not modified. + /// + /// @param [in] n + /// Number of bytes of space that should be dropped from the beginning. + void trim_prefix() { + int32_t begin = 0; + while (begin < size && data[begin] == ' ') { + data += 1; + size -= 1; + } + } + + /// Remove quote char '"' or ''' which should exist as first and last char. + /// + /// @pre n <= size + /// + /// @note Only the base and bounds of the slice are changed; + /// the data is not modified. + /// + /// @param [in] n + /// Number of bytes of space that should be dropped from the beginning. + void trim_quote() { + int32_t begin = 0; + if (size > 2 && ((data[begin] == '"' && data[size - 1] == '"') || + (data[begin] == '\'' && data[size - 1] == '\''))) { + data += 1; + size -= 2; + } + } /// Truncate the slice to the given number of bytes. /// /// @pre n <= size diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 93cc45414df648..9397ab6a5b822e 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -32,6 +32,116 @@ namespace doris { namespace vectorized { class Arena; +void DataTypeArraySerDe::serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +void DataTypeArraySerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + auto& data_column = assert_cast(*ptr); + auto& offsets = data_column.get_offsets(); + size_t offset = offsets[row_num - 1]; + size_t next_offset = offsets[row_num]; + + const IColumn& nested_column = data_column.get_data(); + // bool is_nested_string = remove_nullable(nested_column.get_ptr())->is_column_string(); + + bw.write("[", 1); + // nested column field delim should be replaced as collection delim because this field is in array. + // add ' ' to keep same with origin format with array + options.field_delim = options.collection_delim; + options.field_delim += " "; + nested_serde->serialize_column_to_text(nested_column, offset, next_offset, bw, options); + bw.write("]", 1); +} + +Status DataTypeArraySerDe::deserialize_column_from_text_vector(IColumn& column, + std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const { + DCHECK(!slices.empty()); + int end = num_deserialized && *num_deserialized > 0 ? *num_deserialized : slices.size(); + + for (int i = 0; i < end; ++i) { + if (Status st = deserialize_one_cell_from_text(column, slices[i], options); + st != Status::OK()) { + *num_deserialized = i + 1; + return st; + } + } + return Status::OK(); +} + +Status DataTypeArraySerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + DCHECK(!slice.empty()); + auto& array_column = assert_cast(column); + auto& offsets = array_column.get_offsets(); + IColumn& nested_column = array_column.get_data(); + DCHECK(nested_column.is_nullable()); + if (slice[0] != '[') { + return Status::InvalidArgument("Array does not start with '[' character, found '{}'", + slice[0]); + } + if (slice[slice.size - 1] != ']') { + return Status::InvalidArgument("Array does not end with ']' character, found '{}'", + slice[slice.size - 1]); + } + // empty array [] + if (slice.size == 2) { + offsets.push_back(offsets.back()); + return Status::OK(); + } + slice.remove_prefix(1); + slice.remove_suffix(1); + + // deserialize array column from text we have to know how to split from text and support nested + // complex type. + // 1. get item according to collection_delimiter, but if meet collection_delimiter in string, we should ignore it. + // 2. keep a nested level to support nested complex type. + int nested_level = 0; + bool has_quote = false; + std::vector slices; + slice.trim_prefix(); + slices.emplace_back(slice); + size_t slice_size = slice.size; + // pre add total slice can reduce lasted element check. + for (int idx = 0; idx < slice_size; ++idx) { + char c = slice[idx]; + if (c == '"' || c == '\'') { + has_quote = !has_quote; + } else if (!has_quote && (c == '[' || c == '{')) { + ++nested_level; + } else if (!has_quote && (c == ']' || c == '}')) { + --nested_level; + } else if (!has_quote && nested_level == 0 && c == options.collection_delim) { + // if meet collection_delimiter and not in quote, we can make it as an item. + slices.back().remove_suffix(slice_size - idx); + // add next total slice.(slice data will not change, so we can use slice directly) + // skip delimiter + Slice next(slice.data + idx + 1, slice_size - idx - 1); + next.trim_prefix(); + if (options.converted_from_string) slices.back().trim_quote(); + slices.emplace_back(next); + } + } + + if (options.converted_from_string) slices.back().trim_quote(); + + int elem_deserialized = 0; + Status st = nested_serde->deserialize_column_from_text_vector(nested_column, slices, + &elem_deserialized, options); + offsets.emplace_back(offsets.back() + elem_deserialized); + return st; +} + void DataTypeArraySerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const { diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index 37516b45b03c34..222564de0ecbbf 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -38,12 +38,26 @@ class DataTypeArraySerDe : public DataTypeSerDe { public: DataTypeArraySerDe(const DataTypeSerDeSPtr& _nested_serde) : nested_serde(_nested_serde) {} + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - LOG(FATAL) << "Not support write array column to pb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - LOG(FATAL) << "Not support read from pb to array"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.h b/be/src/vec/data_types/serde/data_type_bitmap_serde.h index 01988b50bde95b..3a36aad612e243 100644 --- a/be/src/vec/data_types/serde/data_type_bitmap_serde.h +++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.h @@ -33,6 +33,29 @@ class Arena; class DataTypeBitMapSerDe : public DataTypeSerDe { public: + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; @@ -44,11 +67,13 @@ class DataTypeBitMapSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { - LOG(FATAL) << "Not support write bitmap column to arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read bitmap column from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp b/be/src/vec/data_types/serde/data_type_date64_serde.cpp index 6133d73b08e2a7..d6a8986b7f4b00 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp @@ -27,6 +27,138 @@ namespace doris { namespace vectorized { +void DataTypeDate64SerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT(); +} + +void DataTypeDate64SerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + Int64 int_val = assert_cast(*ptr).get_element(row_num); + if (options.date_olap_format) { + tm time_tm; + memset(&time_tm, 0, sizeof(time_tm)); + time_tm.tm_mday = static_cast(int_val & 31); + time_tm.tm_mon = static_cast(int_val >> 5 & 15) - 1; + time_tm.tm_year = static_cast(int_val >> 9) - 1900; + char buf[20] = {'\0'}; + strftime(buf, sizeof(buf), "%Y-%m-%d", &time_tm); + std::string s = std::string(buf); + bw.write(s.c_str(), s.length()); + } else { + doris::vectorized::VecDateTimeValue value = + binary_cast(int_val); + + char buf[64]; + char* pos = value.to_string(buf); + bw.write(buf, pos - buf - 1); + } +} + +Status DataTypeDate64SerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeDate64SerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& column_data = assert_cast(column); + Int64 val = 0; + if (options.date_olap_format) { + tm time_tm; + char* res = strptime(slice.data, "%Y-%m-%d", &time_tm); + if (nullptr != res) { + val = (time_tm.tm_year + 1900) * 16 * 32 + (time_tm.tm_mon + 1) * 32 + time_tm.tm_mday; + } else { + // 1400 - 01 - 01 + val = 716833; + } + } else if (ReadBuffer rb(slice.data, slice.size); !read_date_text_impl(val, rb)) { + return Status::InvalidArgument("parse date fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + column_data.insert_value(val); + return Status::OK(); +} + +void DataTypeDateTimeSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +void DataTypeDateTimeSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + Int64 int_val = assert_cast(*ptr).get_element(row_num); + if (options.date_olap_format) { + tm time_tm; + int64 part1 = (int_val / 1000000L); + int64 part2 = (int_val - part1 * 1000000L); + time_tm.tm_year = static_cast((part1 / 10000L) % 10000) - 1900; + time_tm.tm_mon = static_cast((part1 / 100) % 100) - 1; + time_tm.tm_mday = static_cast(part1 % 100); + + time_tm.tm_hour = static_cast((part2 / 10000L) % 10000); + time_tm.tm_min = static_cast((part2 / 100) % 100); + time_tm.tm_sec = static_cast(part2 % 100); + char buf[20] = {'\0'}; + strftime(buf, 20, "%Y-%m-%d %H:%M:%S", &time_tm); + std::string s = std::string(buf); + bw.write(s.c_str(), s.length()); + } else { + doris::vectorized::VecDateTimeValue value = + binary_cast(int_val); + + char buf[64]; + char* pos = value.to_string(buf); + bw.write(buf, pos - buf - 1); + } +} + +Status DataTypeDateTimeSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeDateTimeSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& column_data = assert_cast(column); + Int64 val = 0; + if (options.date_olap_format) { + tm time_tm; + char* res = strptime(slice.data, "%Y-%m-%d %H:%M:%S", &time_tm); + if (nullptr != res) { + val = ((time_tm.tm_year + 1900) * 10000L + (time_tm.tm_mon + 1) * 100L + + time_tm.tm_mday) * + 1000000L + + time_tm.tm_hour * 10000L + time_tm.tm_min * 100L + time_tm.tm_sec; + } else { + // 1400 - 01 - 01 + val = 14000101000000L; + } + } else if (ReadBuffer rb(slice.data, slice.size); !read_datetime_text_impl(val, rb)) { + return Status::InvalidArgument("parse datetime fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + column_data.insert_value(val); + return Status::OK(); +} + void DataTypeDate64SerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h b/be/src/vec/data_types/serde/data_type_date64_serde.h index 94a4618d5c54ea..0e11060c0b0f82 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.h +++ b/be/src/vec/data_types/serde/data_type_date64_serde.h @@ -42,6 +42,17 @@ namespace vectorized { class Arena; class DataTypeDate64SerDe : public DataTypeNumberSerDe { + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; @@ -57,5 +68,19 @@ class DataTypeDate64SerDe : public DataTypeNumberSerDe { Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, int row_idx, bool col_const) const; }; + +class DataTypeDateTimeSerDe : public DataTypeDate64SerDe { + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; +}; } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index 6800486dd3f66d..2205c21c6a89e9 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -27,6 +27,65 @@ namespace doris { namespace vectorized { +void DataTypeDateTimeV2SerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +void DataTypeDateTimeV2SerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + UInt64 int_val = assert_cast(*ptr).get_element(row_num); + DateV2Value val = + binary_cast>(int_val); + + if (options.date_olap_format) { + std::string format = "%Y-%m-%d %H:%i:%s.%f"; + char buf[30]; + val.to_format_string(format.c_str(), format.size(), buf); + std::string s = std::string(buf); + bw.write(s.c_str(), s.length()); + } else { + char buf[64]; + char* pos = val.to_string(buf); + bw.write(buf, pos - buf - 1); + } +} + +Status DataTypeDateTimeV2SerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} +Status DataTypeDateTimeV2SerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& column_data = assert_cast(column); + UInt64 val = 0; + if (options.date_olap_format) { + doris::vectorized::DateV2Value datetimev2_value; + std::string date_format = "%Y-%m-%d %H:%i:%s.%f"; + if (datetimev2_value.from_date_format_str(date_format.data(), date_format.size(), + slice.data, slice.size)) { + val = datetimev2_value.to_date_int_val(); + } else { + val = doris::vectorized::MIN_DATETIME_V2; + } + + } else if (ReadBuffer rb(slice.data, slice.size); + !read_datetime_v2_text_impl(val, rb)) { + return Status::InvalidArgument("parse date fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + column_data.insert_value(val); + return Status::OK(); +} + void DataTypeDateTimeV2SerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index 0b943e60108a8e..f69924b25f4e1d 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -44,12 +44,26 @@ class Arena; class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { public: DataTypeDateTimeV2SerDe(int scale) : scale(scale) {}; + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "not support read arrow array to uint64 column"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, @@ -64,4 +78,4 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { int scale; }; } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index a538ec01f42e9f..0025086abd25d2 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -27,6 +27,55 @@ namespace doris { namespace vectorized { +void DataTypeDateV2SerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +void DataTypeDateV2SerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + UInt32 int_val = assert_cast(*ptr).get_element(row_num); + DateV2Value val = binary_cast>(int_val); + + char buf[64]; + char* pos = val.to_string(buf); + // DateTime to_string the end is /0 + bw.write(buf, pos - buf - 1); +} + +Status DataTypeDateV2SerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeDateV2SerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& column_data = assert_cast(column); + UInt32 val = 0; + if (options.date_olap_format) { + tm time_tm; + char* res = strptime(slice.data, "%Y-%m-%d", &time_tm); + if (nullptr != res) { + val = ((time_tm.tm_year + 1900) << 9) | ((time_tm.tm_mon + 1) << 5) | time_tm.tm_mday; + } else { + val = doris::vectorized::MIN_DATE_V2; + } + } else if (ReadBuffer rb(slice.data, slice.size); !read_date_v2_text_impl(val, rb)) { + return Status::InvalidArgument("parse date fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + column_data.insert_value(val); + return Status::OK(); +} + void DataTypeDateV2SerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { @@ -91,4 +140,4 @@ Status DataTypeDateV2SerDe::write_column_to_mysql(const IColumn& column, } } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index 5d2baf770440c3..68be53dd1ad8ab 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -42,6 +42,18 @@ namespace vectorized { class Arena; class DataTypeDateV2SerDe : public DataTypeNumberSerDe { + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override; @@ -58,4 +70,4 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe { int row_idx, bool col_const) const; }; } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index 5e46c996e745ef..f639a94e646537 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -31,6 +31,55 @@ namespace doris { namespace vectorized { +template +void DataTypeDecimalSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +template +void DataTypeDecimalSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + auto& col = assert_cast&>(*ptr); + if constexpr (!IsDecimalV2) { + T value = col.get_element(row_num); + auto decimal_str = value.to_string(scale); + bw.write(decimal_str.data(), decimal_str.size()); + } else { + auto length = col.get_element(row_num).to_string(buf, scale, scale_multiplier); + bw.write(buf, length); + } +} + +template +Status DataTypeDecimalSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +template +Status DataTypeDecimalSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& column_data = assert_cast&>(column).get_data(); + T val = 0; + if (ReadBuffer rb(slice.data, slice.size); + !read_decimal_text_impl(val, rb, precision, scale)) { + return Status::InvalidArgument("parse decimal fail, string: '{}', primitive type: '{}'", + std::string(rb.position(), rb.count()).c_str(), + get_primitive_type()); + } + column_data.emplace_back(val); + return Status::OK(); +} + template void DataTypeDecimalSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 4c64dc2ae3eedb..9ac2dacf6e17bc 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -67,6 +67,19 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { : scale(scale_), scale_multiplier(decimal_scale_multiplier(scale)) {} + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h index 85c27e7c1cc5a7..aa357aeb806984 100644 --- a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h +++ b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h @@ -36,39 +36,71 @@ class Arena; class DataTypeFixedLengthObjectSerDe : public DataTypeSerDe { public: + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - LOG(FATAL) << "Not support write FixedLengthObject column to pb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - LOG(FATAL) << "Not support read from pb to FixedLengthObject"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); }; void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const override { - LOG(FATAL) << "Not support write FixedLengthObject column to jsonb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override { - LOG(FATAL) << "Not support read from jsonb to FixedLengthObject"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { - LOG(FATAL) << "Not support write FixedLengthObject column to arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read FixedLengthObject column from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override { - LOG(FATAL) << "Not support write object column to mysql"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override { - LOG(FATAL) << "Not support write object column to mysql"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } }; } // namespace vectorized diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp b/be/src/vec/data_types/serde/data_type_hll_serde.cpp index 72052d47cfa2c0..c0bbd89ce3a6ec 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp @@ -37,6 +37,40 @@ namespace doris { namespace vectorized { class IColumn; +void DataTypeHLLSerDe::serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +void DataTypeHLLSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto col_row = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = col_row.first; + row_num = col_row.second; + auto& data = const_cast(assert_cast(*ptr).get_element(row_num)); + std::unique_ptr buf = std::make_unique(data.max_serialized_size()); + size_t size = data.serialize((uint8*)buf.get()); + bw.write(buf.get(), size); +} + +Status DataTypeHLLSerDe::deserialize_column_from_text_vector(IColumn& column, + std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeHLLSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& data_column = assert_cast(column); + + HyperLogLog hyper_log_log(slice); + data_column.insert_value(hyper_log_log); + return Status::OK(); +} + Status DataTypeHLLSerDe::write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const { auto ptype = result.mutable_type(); @@ -136,4 +170,4 @@ Status DataTypeHLLSerDe::write_column_to_mysql(const IColumn& column, } } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.h b/be/src/vec/data_types/serde/data_type_hll_serde.h index 981b197ca8da92..46f90fc20c0fa0 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.h +++ b/be/src/vec/data_types/serde/data_type_hll_serde.h @@ -33,6 +33,15 @@ class Arena; class DataTypeHLLSerDe : public DataTypeSerDe { public: + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; @@ -46,7 +55,8 @@ class DataTypeHLLSerDe : public DataTypeSerDe { int end) const override; void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read hll column from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index ff42a06c9d35dc..b2546b8aec8093 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -54,6 +54,43 @@ Status DataTypeJsonbSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } +void DataTypeJsonbSerDe::serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +void DataTypeJsonbSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + const StringRef& s = assert_cast(*ptr).get_data_at(row_num); + if (s.size > 0) { + bw.write(s.data, s.size); + } +} + +Status DataTypeJsonbSerDe::deserialize_column_from_text_vector(IColumn& column, + std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeJsonbSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + JsonBinaryValue value; + RETURN_IF_ERROR(value.from_json_string(slice.data, slice.size)); + + auto& column_string = assert_cast(column); + column_string.insert_data(value.value(), value.size()); + return Status::OK(); +} + void DataTypeJsonbSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const { @@ -74,4 +111,4 @@ void DataTypeJsonbSerDe::write_column_to_arrow(const IColumn& column, const Null } } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index 9bf523504c7b6d..032688312a9d6b 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -42,10 +42,22 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe { arrow::ArrayBuilder* array_builder, int start, int end) const override; + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, int row_idx, bool col_const) const; }; } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index 5b7bb09147f5ae..afa64d954b1d8b 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -29,6 +29,164 @@ namespace doris { namespace vectorized { class Arena; +void DataTypeMapSerDe::serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} +void DataTypeMapSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + const ColumnMap& map_column = assert_cast(*ptr); + const ColumnArray::Offsets64& offsets = map_column.get_offsets(); + + size_t offset = offsets[row_num - 1]; + size_t next_offset = offsets[row_num]; + + const IColumn& nested_keys_column = map_column.get_keys(); + const IColumn& nested_values_column = map_column.get_values(); + bw.write("{", 1); + for (size_t i = offset; i < next_offset; ++i) { + if (i != offset) { + bw.write(&options.collection_delim, 1); + bw.write(" ", 1); + } + key_serde->serialize_one_cell_to_text(nested_keys_column, i, bw, options); + bw.write(&options.map_key_delim, 1); + value_serde->serialize_one_cell_to_text(nested_values_column, i, bw, options); + } + bw.write("}", 1); +} + +Status DataTypeMapSerDe::deserialize_column_from_text_vector(IColumn& column, + std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} +Status DataTypeMapSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + DCHECK(!slice.empty()); + auto& array_column = assert_cast(column); + auto& offsets = array_column.get_offsets(); + IColumn& nested_key_column = array_column.get_keys(); + IColumn& nested_val_column = array_column.get_values(); + DCHECK(nested_key_column.is_nullable()); + DCHECK(nested_val_column.is_nullable()); + if (slice[0] != '{') { + std::stringstream ss; + ss << slice[0] << '\''; + return Status::InvalidArgument("Map does not start with '{' character, found '" + ss.str()); + } + if (slice[slice.size - 1] != '}') { + std::stringstream ss; + ss << slice[slice.size - 1] << '\''; + return Status::InvalidArgument("Map does not end with '}' character, found '" + ss.str()); + } + // empty map + if (slice.size == 2) { + offsets.push_back(offsets.back()); + return Status::OK(); + } + + // remove '{' '}' + slice.remove_prefix(1); + slice.remove_suffix(1); + slice.trim_prefix(); + + // deserialize map column from text we have to know how to split from text and support nested + // complex type. + // 1. get item according to collection_delimiter, but if meet collection_delimiter in string, we should ignore it. + // 2. get kv according map_key_delimiter, but if meet map_key_delimiter in string, we should ignore it. + // 3. keep a nested level to support nested complex type. + int nested_level = 0; + bool has_quote = false; + int start_pos = 0; + size_t slice_size = slice.size; + bool key_added = false; + int idx = 0; + int elem_deserialized = 0; + for (; idx < slice_size; ++idx) { + char c = slice[idx]; + if (c == '"' || c == '\'') { + has_quote = !has_quote; + } else if (c == '\\' && idx + 1 < slice_size) { //escaped + ++idx; + } else if (!has_quote && (c == '[' || c == '{')) { + ++nested_level; + } else if (!has_quote && (c == ']' || c == '}')) { + --nested_level; + } else if (!has_quote && nested_level == 0 && c == options.map_key_delim && !key_added) { + // if meet map_key_delimiter and not in quote, we can make it as key elem. + if (idx == start_pos) { + continue; + } + Slice next(slice.data + start_pos, idx - start_pos); + next.trim_prefix(); + if (options.converted_from_string && + (next.starts_with("\"") || next.starts_with("'"))) { + next.remove_prefix(1); + } + if (options.converted_from_string && (next.ends_with("\"") || next.ends_with("'"))) { + next.remove_suffix(1); + } + if (Status st = + key_serde->deserialize_one_cell_from_text(nested_key_column, next, options); + !st.ok()) { + nested_key_column.pop_back(elem_deserialized); + nested_val_column.pop_back(elem_deserialized); + return st; + } + // skip delimiter + start_pos = idx + 1; + key_added = true; + } else if (!has_quote && nested_level == 0 && c == options.collection_delim && key_added) { + // if meet collection_delimiter and not in quote, we can make it as value elem + if (idx == start_pos) { + continue; + } + Slice next(slice.data + start_pos, idx - start_pos); + next.trim_prefix(); + if (options.converted_from_string) next.trim_quote(); + + if (Status st = value_serde->deserialize_one_cell_from_text(nested_val_column, next, + options); + !st.ok()) { + nested_key_column.pop_back(elem_deserialized + 1); + nested_val_column.pop_back(elem_deserialized); + return st; + } + // skip delimiter + start_pos = idx + 1; + // reset key_added + key_added = false; + ++elem_deserialized; + } + } + // for last value elem + if (!has_quote && nested_level == 0 && idx == slice_size && idx != start_pos && key_added) { + Slice next(slice.data + start_pos, idx - start_pos); + next.trim_prefix(); + if (options.converted_from_string) next.trim_quote(); + + if (Status st = + value_serde->deserialize_one_cell_from_text(nested_val_column, next, options); + !st.ok()) { + nested_key_column.pop_back(elem_deserialized + 1); + nested_val_column.pop_back(elem_deserialized); + return st; + } + ++elem_deserialized; + } + + offsets.emplace_back(offsets.back() + elem_deserialized); + return Status::OK(); +} + void DataTypeMapSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { auto blob = static_cast(arg); column.deserialize_and_insert_from_arena(blob->getBlob()); diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h b/be/src/vec/data_types/serde/data_type_map_serde.h index 67474d0676b8f7..e90ba11f299436 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.h +++ b/be/src/vec/data_types/serde/data_type_map_serde.h @@ -39,12 +39,24 @@ class DataTypeMapSerDe : public DataTypeSerDe { DataTypeMapSerDe(const DataTypeSerDeSPtr& _key_serde, const DataTypeSerDeSPtr& _value_serde) : key_serde(_key_serde), value_serde(_value_serde) {} + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - LOG(FATAL) << "Not support write map column to pb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - LOG(FATAL) << "Not support read from pb to map"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const override; diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index d96a705defaf31..77d67e764a174e 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -38,6 +38,56 @@ namespace doris { namespace vectorized { class Arena; +void DataTypeNullableSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +void DataTypeNullableSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + const auto& col_null = assert_cast(*ptr); + if (col_null.is_null_at(row_num)) { + bw.write("NULL", 4); + } else { + nested_serde->serialize_one_cell_to_text(col_null.get_nested_column(), row_num, bw, + options); + } +} + +Status DataTypeNullableSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeNullableSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& null_column = assert_cast(column); + // TODO(Amory) make null literal configurable + if (slice.size == 4 && slice[0] == 'N' && slice[1] == 'U' && slice[2] == 'L' && + slice[3] == 'L') { + null_column.insert_data(nullptr, 0); + return Status::OK(); + } + auto st = nested_serde->deserialize_one_cell_from_text(null_column.get_nested_column(), slice, + options); + if (!st.ok()) { + // fill null if fail + null_column.insert_data(nullptr, 0); // 0 is meaningless here + return Status::OK(); + } + // fill not null if success + null_column.get_null_map_data().push_back(0); + return Status::OK(); +} + Status DataTypeNullableSerDe::write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const { int row_count = end - start; diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 1c7dc1b8a55464..42e30e2d1bdc48 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -35,6 +35,17 @@ class DataTypeNullableSerDe : public DataTypeSerDe { public: DataTypeNullableSerDe(const DataTypeSerDeSPtr& _nested_serde) : nested_serde(_nested_serde) {} + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index 081ad569c32854..8bb3473c07629a 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -97,6 +97,78 @@ void DataTypeNumberSerDe::write_column_to_arrow(const IColumn& column, const } } +template +Status DataTypeNumberSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& column_data = reinterpret_cast(column); + ReadBuffer rb(slice.data, slice.size); + if constexpr (std::is_same::value) { + // TODO: support for Uint128 + return Status::InvalidArgument("uint128 is not support"); + } else if constexpr (std::is_same_v || std::is_same_v) { + T val = 0; + if (!read_float_text_fast_impl(val, rb)) { + return Status::InvalidArgument("parse number fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + column_data.insert_value(val); + } else if constexpr (std::is_same_v) { + // Note: here we should handle the bool type + T val = 0; + if (!try_read_bool_text(val, rb)) { + return Status::InvalidArgument("parse boolean fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + column_data.insert_value(val); + } else if constexpr (std::is_integral::value) { + T val = 0; + if (!read_int_text_impl(val, rb)) { + return Status::InvalidArgument("parse number fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + column_data.insert_value(val); + } else { + DCHECK(false); + } + return Status::OK(); +} + +template +void DataTypeNumberSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +template +void DataTypeNumberSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + auto data = assert_cast&>(*ptr).get_element(row_num); + if constexpr (std::is_same::value) { + std::string hex = int128_to_string(data); + bw.write(hex.data(), hex.size()); + } else if constexpr (std::is_same_v) { + // fmt::format_to maybe get inaccurate results at float type, so we use gutil implement. + char buf[MAX_FLOAT_STR_LENGTH + 2]; + int len = FloatToBuffer(data, MAX_FLOAT_STR_LENGTH + 2, buf); + bw.write(buf, len); + } else if constexpr (std::is_integral::value || std::numeric_limits::is_iec559) { + bw.write_number(data); + } +} + +template +Status DataTypeNumberSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + template void DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, @@ -176,4 +248,4 @@ template class DataTypeNumberSerDe; template class DataTypeNumberSerDe; template class DataTypeNumberSerDe; } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index 2130d784935181..1e27ef03d12c28 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -54,6 +54,18 @@ class DataTypeNumberSerDe : public DataTypeSerDe { public: using ColumnType = ColumnVector; + + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h b/be/src/vec/data_types/serde/data_type_object_serde.h index ceb4a20cf2c8d6..3dddc06113dcbb 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.h +++ b/be/src/vec/data_types/serde/data_type_object_serde.h @@ -36,40 +36,71 @@ class Arena; class DataTypeObjectSerDe : public DataTypeSerDe { public: + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - LOG(FATAL) << "Not support write object column to pb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - LOG(FATAL) << "Not support read from pb to object"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const override { - LOG(FATAL) << "Not support write object column to json"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const override { - LOG(FATAL) << "Not support write json object to column"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { - LOG(FATAL) << "Not support write object column to arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read object column from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override { - LOG(FATAL) << "Not support write object column to mysql"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override { - LOG(FATAL) << "Not support write object column to mysql"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } }; } // namespace vectorized diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h index ebeccca0970c43..4c9dae672beee0 100644 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h +++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h @@ -40,6 +40,30 @@ namespace vectorized { template class DataTypeQuantileStateSerDe : public DataTypeSerDe { public: + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); + } + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; @@ -51,11 +75,13 @@ class DataTypeQuantileStateSerDe : public DataTypeSerDe { void write_column_to_arrow(const IColumn& column, const NullMap* null_map, arrow::ArrayBuilder* array_builder, int start, int end) const override { - LOG(FATAL) << "Not support write " << column.get_name() << " to arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, int end, const cctz::time_zone& ctz) const override { - LOG(FATAL) << "Not support read " << column.get_name() << " from arrow"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 4df22029c959c0..15d44bf692f142 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -39,6 +39,23 @@ namespace cctz { class time_zone; } // namespace cctz +#define SERIALIZE_COLUMN_TO_TEXT() \ + for (size_t i = start_idx; i < end_idx; ++i) { \ + if (i != start_idx) { \ + bw.write(options.field_delim.data(), options.field_delim.size()); \ + } \ + serialize_one_cell_to_text(column, i, bw, options); \ + } + +#define DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() \ + for (int i = 0; i < slices.size(); ++i) { \ + if (Status st = deserialize_one_cell_from_text(column, slices[i], options); \ + st != Status::OK()) { \ + return st; \ + } \ + ++*num_deserialized; \ + } + namespace doris { class PValues; class JsonbValue; @@ -59,9 +76,52 @@ class IDataType; // the developer does not know how many datatypes has to deal. class DataTypeSerDe { +public: + // Text serialization/deserialization of data types depend on some settings witch we define + // in formatOptions. + struct FormatOptions { + /** + * if true, we will use olap format which defined in src/olap/types.h, but we do not suggest + * use this format in olap, because it is more slower, keep this option is for compatibility. + */ + bool date_olap_format = false; + /** + * field delimiter is used to separate fields in one row + */ + std::string field_delim = ","; + /** + * collection_delim is used to separate elements in collection, such as array, map + */ + char collection_delim = ','; + /** + * map_key_delim is used to separate key and value in map , eg. key:value + */ + char map_key_delim = ':'; + /** + * used in deserialize with text format, if the element is packed in string using "" or '', but not string type, and this switch is open + * we can convert the string to the element type, such as int, float, double, date, datetime, timestamp, decimal + * by dropping the "" or ''. + */ + bool converted_from_string = false; + }; + public: DataTypeSerDe(); virtual ~DataTypeSerDe(); + // Text serializer and deserializer with formatOptions to handle different text format + virtual void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const = 0; + + // this function serialize multi-column to one row text to avoid virtual function call in complex type nested loop + virtual void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const = 0; + + virtual Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const = 0; + // deserialize text vector is to avoid virtual function call in complex type nested loop + virtual Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const = 0; // Protobuf serializer and deserializer virtual Status write_column_to_pb(const IColumn& column, PValues& result, int start, diff --git a/be/src/vec/data_types/serde/data_type_string_serde.cpp b/be/src/vec/data_types/serde/data_type_string_serde.cpp index bff2df4431bec4..9db727e9499729 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_string_serde.cpp @@ -34,6 +34,37 @@ namespace doris { namespace vectorized { class Arena; +void DataTypeStringSerDe::serialize_column_to_text(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options) const { + SERIALIZE_COLUMN_TO_TEXT() +} + +void DataTypeStringSerDe::serialize_one_cell_to_text(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + const auto& value = assert_cast(*ptr).get_data_at(row_num); + bw.write(value.data, value.size); +} + +Status DataTypeStringSerDe::deserialize_column_from_text_vector( + IColumn& column, std::vector& slices, int* num_deserialized, + const FormatOptions& options) const { + DESERIALIZE_COLUMN_FROM_TEXT_VECTOR() + return Status::OK(); +} + +Status DataTypeStringSerDe::deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const { + auto& column_data = assert_cast(column); + column_data.insert_data(slice.data, slice.size); + return Status::OK(); +} + Status DataTypeStringSerDe::write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const { result.mutable_bytes_value()->Reserve(end - start); @@ -157,4 +188,4 @@ Status DataTypeStringSerDe::write_column_to_mysql(const IColumn& column, } } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 85b60cb268d2e5..74822c8f95cc36 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -33,6 +33,19 @@ class Arena; class DataTypeStringSerDe : public DataTypeSerDe { public: + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override; + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override; + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override; + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override; + Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h b/be/src/vec/data_types/serde/data_type_struct_serde.h index 927c7ac9edcd6f..33d14c74115dfb 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.h +++ b/be/src/vec/data_types/serde/data_type_struct_serde.h @@ -39,12 +39,39 @@ class DataTypeStructSerDe : public DataTypeSerDe { DataTypeStructSerDe(const DataTypeSerDeSPtrs& _elemSerDeSPtrs) : elemSerDeSPtrs(_elemSerDeSPtrs) {} + void serialize_one_cell_to_text(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "serialize_one_cell_to_text with type " + column.get_name()); + } + + void serialize_column_to_text(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "serialize_column_to_text with type " + column.get_name()); + } + + Status deserialize_one_cell_from_text(IColumn& column, Slice& slice, + const FormatOptions& options) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "deserialize_one_cell_from_text with type " + column.get_name()); + } + + Status deserialize_column_from_text_vector(IColumn& column, std::vector& slices, + int* num_deserialized, + const FormatOptions& options) const override { + throw doris::Exception( + ErrorCode::NOT_IMPLEMENTED_ERROR, + "deserialize_column_from_text_vector with type " + column.get_name()); + } Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override { - LOG(FATAL) << "Not support write struct column to pb"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "write_column_to_pb with type " + column.get_name()); } Status read_column_from_pb(IColumn& column, const PValues& arg) const override { - LOG(FATAL) << "Not support read from pb to strut"; + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "read_column_from_pb with type " + column.get_name()); } void write_one_cell_to_jsonb(const IColumn& column, JsonbWriter& result, Arena* mem_pool, int32_t col_id, int row_num) const override; diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp new file mode 100644 index 00000000000000..188d70daa94d7a --- /dev/null +++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp @@ -0,0 +1,1260 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gtest/gtest_pred_impl.h" +#include "olap/types.h" // for TypeInfo +#include "olap/wrapper_field.h" +#include "vec/columns/column.h" +#include "vec/common/string_buffer.hpp" +#include "vec/core/field.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_map.h" +#include "vec/data_types/serde/data_type_serde.h" +#include "vec/data_types/serde_utils.h" +#include "vec/io/reader_buffer.h" + +namespace doris::vectorized { +// This test aim to make sense for text serde of data types. +// we use default formatOption and special formatOption to equal serde for wrapperField. +TEST(TextSerde, ScalaDataTypeSerdeTextTest) { + // arithmetic scala field types + { + // fieldType, test_string, expect_string + typedef std::tuple, std::vector> FieldType_RandStr; + std::vector arithmetic_scala_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_BOOL, {"0", "1", "-1"}, + {"0", "1", ""}), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_TINYINT, {"127", "-128", "-190"}, + {"127", "-128", ""}), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_SMALLINT, {"32767", "32768", "-32769"}, + {"32767", "", ""}), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_INT, + {"2147483647", "2147483648", "-2147483649"}, + {"2147483647", "", ""}), + // float ==> float32(32bit) + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_FLOAT, + {"1.123", "3.40282e+38", "3.40282e+38+1"}, + {"1.123", "3.40282e+38", ""}), + // double ==> float64(64bit) + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"2343.12345465746", "2.22507e-308", "2.22507e-308-1"}, + {"2343.12345465746", "2.22507e-308", ""}), + // BIGINT ==> int64_t(64bit) + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_BIGINT, + {"9223372036854775807", "-9223372036854775808", "9223372036854775808"}, + {"9223372036854775807", "-9223372036854775808", ""}), + // LARGEINT ==> int128_t(128bit) + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_LARGEINT, + {"170141183460469231731687303715884105727", + "−170141183460469231731687303715884105728", + "170141183460469231731687303715884105728"}, + {"170141183460469231731687303715884105727", "", ""}), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_CHAR, {"amory happy"}, + {"amory happy"}), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_VARCHAR, {"doris be better"}, + {"doris be better"}), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris be better"}, + {"doris be better"}), + // decimal ==> decimalv2(decimal<128>(27,9)) + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL, + { + // (17, 9)(first 0 will ignore) + "012345678901234567.012345678", + // (18, 8) (automatically fill 0 for scala) + "123456789012345678.01234567", + // (17, 10) (rounding last to make it fit) + "12345678901234567.0123456779", + // (17, 11) (rounding last to make it fit) + "12345678901234567.01234567791", + // (19, 8) (wrong) + "1234567890123456789.01234567", + }, + {"12345678901234567.012345678", "123456789012345678.012345670", + "12345678901234567.012345678", "", ""}), + // decimal32 ==> decimal32(9,2) (7,2) (6,3) (7,3) (8,1) + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32, + {"1234567.12", "123456.123", "1234567.123", "12345679.1"}, + {"1234567.12", "123456.12", "1234567.12", ""}), + // decimal64 ==> decimal64(18,9) (9, 9) (3,2) (9, 10) (10, 9) + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DECIMAL64, + {"123456789.123456789", "123.12", "123456789.0123456789", + "1234567890.123456789"}, + {"123456789.123456789", "123.120000000", "123456789.012345679", ""}), + // decimal128I ==> decimal128I(38,18) (19,18) + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL128I, + {"01234567890123456789.123456789123456789", + // (20,11) (automatically fill 0 for scala) + "12345678901234567890.12345678911", + // (19,18) + "1234567890123456789.123456789123456789", + // (19,19) (rounding last to make it fit) + "1234567890123456789.1234567890123456789", + // (18, 20) (rounding to make it fit) + "123456789012345678.01234567890123456789", + // (20, 19) (wrong) + "12345678901234567890.1234567890123456789"}, + {"1234567890123456789.123456789123456789", + "12345678901234567890.123456789110000000", + "1234567890123456789.123456789123456789", + "1234567890123456789.123456789012345679", + "123456789012345678.012345678901234568", + "12345678901234567890.123456789012345679"}), + + }; + + for (auto type_pair : arithmetic_scala_field_types) { + auto type = std::get<0>(type_pair); + DataTypePtr data_type_ptr; + if (type == FieldType::OLAP_FIELD_TYPE_DECIMAL32) { + // decimal32(7, 2) + data_type_ptr = DataTypeFactory::instance().create_data_type(type, 9, 2); + } else if (type == FieldType::OLAP_FIELD_TYPE_DECIMAL64) { + // decimal64(18, 9) + data_type_ptr = DataTypeFactory::instance().create_data_type(type, 18, 9); + } else if (type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I) { + // decimal128I(38,18) + data_type_ptr = DataTypeFactory::instance().create_data_type(type, 38, 18); + } else { + data_type_ptr = DataTypeFactory::instance().create_data_type(type, 0, 0); + } + std::cout << "========= This type is " << data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + + auto col = data_type_ptr->create_column(); + + // serde for data types with default FormatOption + DataTypeSerDe::FormatOptions default_format_option; + DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); + + auto ser_col = ColumnString::create(); + ser_col->reserve(std::get<1>(type_pair).size()); + VectorBufferWriter buffer_writer(*ser_col.get()); + + for (int i = 0; i < std::get<1>(type_pair).size(); ++i) { + string test_str = std::get<1>(type_pair)[i]; + std::cout << "the str : " << test_str << std::endl; + Slice rb_test(test_str.data(), test_str.size()); + // deserialize + Status st = + serde->deserialize_one_cell_from_text(*col, rb_test, default_format_option); + if (std::get<2>(type_pair)[i].empty()) { + EXPECT_EQ(st.ok(), false); + std::cout << "deserialize failed: " << st.to_json() << std::endl; + continue; + } + EXPECT_EQ(st.ok(), true); + // serialize + serde->serialize_one_cell_to_text(*col, i, buffer_writer, default_format_option); + buffer_writer.commit(); + EXPECT_EQ(ser_col->get_data_at(ser_col->size() - 1).to_string(), + std::get<2>(type_pair)[i]); + } + } + } + + // date and datetime type + { + typedef std::pair FieldType_RandStr; + std::vector date_scala_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DATE, "2020-01-01"), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DATE, "2020-01-01"), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DATEV2, "2020-01-01"), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DATETIME, "2020-01-01 12:00:00"), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DATETIMEV2, + "2020-01-01 12:00:00.666666"), + }; + for (auto pair : date_scala_field_types) { + auto type = pair.first; + DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type(type, 0, 0); + std::cout << "========= This type is " << data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + + std::unique_ptr min_wf(WrapperField::create_by_type(type)); + std::unique_ptr max_wf(WrapperField::create_by_type(type)); + std::unique_ptr rand_wf(WrapperField::create_by_type(type)); + + min_wf->set_to_min(); + max_wf->set_to_max(); + rand_wf->from_string(pair.second, 0, 0); + + string min_s = min_wf->to_string(); + string max_s = max_wf->to_string(); + string rand_date = rand_wf->to_string(); + + Slice min_rb(min_s.data(), min_s.size()); + Slice max_rb(max_s.data(), max_s.size()); + Slice rand_rb(rand_date.data(), rand_date.size()); + + auto col = data_type_ptr->create_column(); + DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); + // make use c++ lib equals to wrapper field from_string behavior + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.date_olap_format = true; + + Status st = serde->deserialize_one_cell_from_text(*col, min_rb, formatOptions); + EXPECT_EQ(st.ok(), true); + st = serde->deserialize_one_cell_from_text(*col, max_rb, formatOptions); + EXPECT_EQ(st.ok(), true); + st = serde->deserialize_one_cell_from_text(*col, rand_rb, formatOptions); + EXPECT_EQ(st.ok(), true); + + auto ser_col = ColumnString::create(); + ser_col->reserve(3); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + serde->serialize_one_cell_to_text(*col, 1, buffer_writer, formatOptions); + buffer_writer.commit(); + serde->serialize_one_cell_to_text(*col, 2, buffer_writer, formatOptions); + buffer_writer.commit(); + rtrim(min_s); + rtrim(max_s); + rtrim(rand_date); + StringRef min_s_d = ser_col->get_data_at(0); + StringRef max_s_d = ser_col->get_data_at(1); + StringRef rand_s_d = ser_col->get_data_at(2); + + std::cout << "min(" << min_s << ") with datat_ype_str:" << min_s_d << std::endl; + std::cout << "max(" << max_s << ") with datat_ype_str:" << max_s_d << std::endl; + std::cout << "rand(" << rand_date << ") with datat_type_str:" << rand_s_d << std::endl; + EXPECT_EQ(min_s, min_s_d.to_string()); + EXPECT_EQ(max_s, max_s_d.to_string()); + EXPECT_EQ(rand_date, rand_s_d.to_string()); + } + } + + // nullable data type with const column + { + DataTypePtr data_type_ptr = DataTypeFactory::instance().create_data_type( + FieldType::OLAP_FIELD_TYPE_STRING, 0, 0); + DataTypePtr nullable_ptr = std::make_shared(data_type_ptr); + std::unique_ptr rand_wf( + WrapperField::create_by_type(FieldType::OLAP_FIELD_TYPE_STRING)); + std::string test_str = generate(128); + rand_wf->from_string(test_str, 0, 0); + Field string_field(test_str); + ColumnPtr col = nullable_ptr->create_column_const(0, string_field); + DataTypeSerDe::FormatOptions default_format_option; + DataTypeSerDeSPtr serde = nullable_ptr->get_serde(); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, default_format_option); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(rand_wf->to_string(), rand_s_d.to_string()); + } +} + +// test for array and map +TEST(TextSerde, ComplexTypeSerdeTextTest) { + // array-scala + { + // nested type,test string, expect string(option.converted_from_string=false),expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, std::vector> + FieldType_RandStr; + std::vector nested_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_BOOL, + {"[0, 1,-1,1]", "[true, false]", "[,]", "[1,true,t]", + "[1, false], [,], [1,true,t]"}, + {"[0, 1, NULL, 1]", "[1, 0]", "[NULL, NULL]", "[1, 1, NULL]", + "[1, NULL, NULL, 1, NULL]"}, + {"[0, 1, NULL, 1]", "[1, 0]", "[NULL, NULL]", "[1, 1, NULL]", + "[1, NULL, NULL, 1, NULL]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_TINYINT, + {"[1111, 12, ]", "[,1 , 3]", "[ed, 2,]", "[],[]", "[[]]"}, + {"[NULL, 12, NULL]", "[NULL, 1, 3]", "[NULL, 2, NULL]", "[NULL]", "[NULL]"}, + {"[NULL, 12, NULL]", "[NULL, 1, 3]", "[NULL, 2, NULL]", "[NULL]", + "[NULL]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_FLOAT, + {"[0.33, 0.67, 0]", "[3.40282e+38, 3.40282e+38+1]", "[\"3.40282e+38+1\"]", + "[\"3.14\", 0.77]"}, + {"[0.33, 0.67, 0]", "[3.40282e+38, NULL]", "[NULL]", "[NULL, 0.77]"}, + {"[0.33, 0.67, 0]", "[3.40282e+38, NULL]", "[NULL]", "[3.14, 0.77]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"[3.1415926, 0.878787878, 12.44456475432]", + "[2343.12345465746, 2.22507e-308, 2.22507e-308-1, \"2.22507e-308\"]"}, + {"[3.1415926, 0.878787878, 12.44456475432]", + "[2343.12345465746, 2.22507e-308, NULL, NULL]"}, + {"[3.1415926, 0.878787878, 12.44456475432]", + "[2343.12345465746, 2.22507e-308, NULL, 2.22507e-308]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, + {"[\"hello\", \"world\"]", "['a', 'b', 'c']", + "[\"42\",1412341,true,42.43,3.40282e+38+1,alpha:beta:gamma,Earth#42:" + "Control#86:Bob#31,17:true:Abe " + "Linkedin,BLUE,\"\\N\",\"\u0001\u0002\u0003,\\u0001bc\"]"}, + // last : ["42",1412341,true,42.43,3.40282e+38+1,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,"\N",",\u0001bc"] + {"[\"hello\", \"world\"]", "['a', 'b', 'c']", + "[\"42\", 1412341, true, 42.43, 3.40282e+38+1, alpha:beta:gamma, " + "Earth#42:Control#86:Bob#31, 17:true:Abe Linkedin, BLUE, \"\\N\", " + "\"\x1\x2\x3,\\u0001bc\"]"}, + {"[hello, world]", "[a, b, c]", + "[42, 1412341, true, 42.43, 3.40282e+38+1, alpha:beta:gamma, " + "Earth#42:Control#86:Bob#31, 17:true:Abe Linkedin, BLUE, \\N, " + "\x1\x2\x3,\\u0001bc]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATE, + {"[\\\"2022-07-13\\\",\"2022-07-13 12:30:00\"]", + "[2022-07-13 12:30:00, \"2022-07-13\"]", + "[2022-07-13 12:30:00.000, 2022-07-13]"}, + {"[NULL, NULL]", "[2022-07-13, NULL]", "[2022-07-13, 2022-07-13]"}, + {"[NULL, 2022-07-13]", "[2022-07-13, 2022-07-13]", + "[2022-07-13, 2022-07-13]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATETIME, + {"[\"2022-07-13\",\"2022-07-13 12:30:00\"]", + "[2022-07-13 12:30:00, \"2022-07-13\", 2022-07-13 12:30:00.0000]"}, + {"[NULL, NULL]", "[2022-07-13 12:30:00, NULL, 2022-07-13 12:30:00]"}, + {"[2022-07-13 00:00:00, 2022-07-13 12:30:00]", + "[2022-07-13 12:30:00, 2022-07-13 00:00:00, 2022-07-13 12:30:00]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DECIMAL, + {"[4, 5.5, 6.67]", + "[012345678901234567.012345678,123456789012345678.01234567, " + "12345678901234567.0123456779,12345678901234567.01234567791," + "1234567890123456789.01234567]", + "[\"012345678901234567.012345678\",\"123456789012345678.01234567\", " + "\"12345678901234567.0123456779\", " + "\"12345678901234567.01234567791\",\"1234567890123456789.01234567\"]", + "[\\1234567890123456789.01234567\\]"}, + {"[4.000000000, 5.500000000, 6.670000000]", + "[12345678901234567.012345678, 123456789012345678.012345670, " + "12345678901234567.012345678, NULL, NULL]", + "[NULL, NULL, NULL, NULL, NULL]", "[NULL]"}, + {"[4.000000000, 5.500000000, 6.670000000]", + "[12345678901234567.012345678, 123456789012345678.012345670, " + "12345678901234567.012345678, NULL, NULL]", + "[12345678901234567.012345678, 123456789012345678.012345670, " + "12345678901234567.012345678, NULL, NULL]", + "[NULL]"}), + }; + // array type + for (auto type_pair : nested_field_types) { + auto type = std::get<0>(type_pair); + DataTypePtr nested_data_type_ptr = + DataTypeFactory::instance().create_data_type(type, 0, 0); + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(nested_data_type_ptr)); + + std::cout << "========= This type is " << array_data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + + auto col = array_data_type_ptr->create_column(); + auto col2 = array_data_type_ptr->create_column(); + auto col3 = array_data_type_ptr->create_column(); + + DataTypeSerDeSPtr serde = array_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = array_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<1>(type_pair).size(); ++i) { + std::string rand_str = std::get<1>(type_pair)[i]; + std::string expect_str = std::get<2>(type_pair)[i]; + std::string expect_str_1 = std::get<3>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "[]") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, i, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + Status status = array_data_type_ptr->from_string(rb, col2); + EXPECT_EQ(status.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, i, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "[]") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, i, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } + + // map-scala-scala + { + // nested key type , nested value type, test string , expect string + typedef std::tuple, std::vector> + FieldType_RandStr; + std::vector nested_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_BOOL, + FieldType::OLAP_FIELD_TYPE_STRING, + {"{1: \"amory is 7\", 0: \" doris be better \", -1: \"wrong,\"}", + "{\"1\": \"amory is 7\", \"0\": 1}"}, + {"{1:\"amory is 7\", 0:\" doris be better \", NULL:\"wrong,\"}", + "{NULL:\"amory is 7\", NULL:1}"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"{\" ,.amory\": 111.2343, \"\": 112., 'dggs': 13.14 , NULL: 12.2222222, " + ": NULL\\}", + "{\"\": NULL, null: 12.44}", "{{}}", "{{}", "}}", "{}, {}"}, + {"{\" ,.amory\":111.2343, \"\":112, 'dggs':13.14, NULL:12.2222222, :NULL}", + "{\"\":NULL, null:12.44}", "{}", "{}", "", "{}"}), + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_FLOAT, + FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"{0.33: 3.1415926,3.1415926: 22}", "{3.14, 15926: 22}", "{3.14}", + "{222:3444},", "{4.12, 677: 455: 356, 67.6:67.7}"}, + {"{0.33:3.1415926, 3.1415925:22}", "{NULL:22}", "{}", "", + "{NULL:NULL, 67.6:67.7}"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATE, FieldType::OLAP_FIELD_TYPE_DATETIME, + {"{2022-07-13: 2022-07-13 12:30:00, 2022-07-13 12:30:00: 2022-07-13 " + "12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, NULL: NULL, " + "2022-07-13:'2022-07-13 12:30:00'}", + // escaped char ':' + "{2022-07-13 12\\:30\\:00: 2022-07-13, 2022-07-13 12\\:30\\:00.000: " + "2022-07-13 12:30:00.000, 2022-07-13:\'2022-07-13 12:30:00\'}"}, + {"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:NULL, 2022-07-13:NULL, " + "NULL:NULL, 2022-07-13:NULL}", + "{2022-07-13:2022-07-13 00:00:00, 2022-07-13:2022-07-13 12:30:00, " + "2022-07-13:NULL}"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATETIME, FieldType::OLAP_FIELD_TYPE_DECIMAL, + {"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, NULL: NULL}", + "{\"2022-07-13 12:30:00\": \"12.45675432\"}", + "{2022-07-13 12\\:30\\:00:12.45675432, 2022-07-13#12:30:00: 12.45675432}", + "{2022-07-13 12\\:30\\:00.0000:12.45675432, null:12.34}"}, + {"{2022-07-13 12:00:00:30.000000000, 2022-07-13 00:00:00:12.456754320, " + "NULL:NULL}", + "{NULL:NULL}", + "{2022-07-13 12:30:00:12.456754320, 2022-07-13 12:00:00:30.000000000}", + "{2022-07-13 12:30:00:12.456754320, NULL:12.340000000}"}), + }; + + for (auto type_pair : nested_field_types) { + auto key_type = std::get<0>(type_pair); + auto value_type = std::get<1>(type_pair); + DataTypePtr nested_key_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + DataTypePtr nested_value_type_ptr = + DataTypeFactory::instance().create_data_type(value_type, 0, 0); + DataTypePtr map_data_type_ptr = std::make_shared( + make_nullable(nested_key_type_ptr), make_nullable(nested_value_type_ptr)); + + std::cout << "========= This type is " << map_data_type_ptr->get_name() << std::endl; + + auto col2 = map_data_type_ptr->create_column(); + DataTypeSerDeSPtr serde = map_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + { + auto col = map_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + std::cout << st.to_json() << std::endl; + if (expect_str.empty()) { + EXPECT_FALSE(st.ok()); + continue; + } + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + // from_string + { + ReadBuffer rb(rand_str.data(), rand_str.size()); + std::cout << "from string rb: " << rb.to_string() << std::endl; + Status stat = map_data_type_ptr->from_string(rb, col2); + std::cout << stat.to_json() << std::endl; + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, col2->size() - 1, buffer_writer, + formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d.to_string() << std::endl; + } + } + } + + // option with converted_with_string true + typedef std::tuple, std::vector> + FieldType_RandStr; + std::vector field_types = { + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATE, FieldType::OLAP_FIELD_TYPE_DATETIME, + {"{2022-07-13: 2022-07-13 12:30:00, 2022-07-13 12:30:00: 2022-07-13 " + "12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, NULL: NULL, " + "2022-07-13:'2022-07-13 12:30:00'}", + // escaped char ':' + "{2022-07-13 12\\:30\\:00: 2022-07-13, 2022-07-13 12\\:30\\:00.000: " + "2022-07-13 12:30:00.000, 2022-07-13:\'2022-07-13 12:30:00\'}"}, + {"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:NULL, 2022-07-13:NULL, " + "NULL:NULL, 2022-07-13:2022-07-13 12:30:00}", + "{2022-07-13:2022-07-13 00:00:00, 2022-07-13:2022-07-13 12:30:00, " + "2022-07-13:2022-07-13 12:30:00}"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_DATETIME, FieldType::OLAP_FIELD_TYPE_DECIMAL, + {"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, NULL: NULL}", + "{\"2022-07-13 12:30:00\": \"12.45675432\"}", + "{2022-07-13 12\\:30\\:00:12.45675432, 2022-07-13#12:30:00: 12.45675432}", + "{2022-07-13 12\\:30\\:00.0000:12.45675432, null:12.34}"}, + {"{2022-07-13 12:00:00:30.000000000, 2022-07-13 00:00:00:12.456754320, " + "NULL:NULL}", + "{2022-07-13 12:30:00:12.456754320}", + "{2022-07-13 12:30:00:12.456754320, 2022-07-13 12:00:00:30.000000000}", + "{2022-07-13 12:30:00:12.456754320, NULL:12.340000000}"}), + }; + for (auto type_pair : field_types) { + auto key_type = std::get<0>(type_pair); + auto value_type = std::get<1>(type_pair); + DataTypePtr nested_key_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + DataTypePtr nested_value_type_ptr = + DataTypeFactory::instance().create_data_type(value_type, 0, 0); + DataTypePtr map_data_type_ptr = std::make_shared( + make_nullable(nested_key_type_ptr), make_nullable(nested_value_type_ptr)); + + std::cout << "========= This type is " << map_data_type_ptr->get_name() << std::endl; + + auto col2 = map_data_type_ptr->create_column(); + DataTypeSerDeSPtr serde = map_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + formatOptions.converted_from_string = true; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + { + auto col = map_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + std::cout << st.to_json() << std::endl; + if (expect_str.empty()) { + EXPECT_FALSE(st.ok()); + continue; + } + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + } + } +} + +TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { + // array-array + { + // nested type,test string, expect string(option.converted_from_string=false), expect_from_string, expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, std::vector, + std::vector> + FieldType_RandStr; + std::vector nested_field_types = { + FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, + {"[[Hello, World],[This, is, a, nested, array]]"}, + {"[[Hello, World], [This, is, a, nested, array]]"}, + {"[NULL, NULL, NULL, NULL, NULL, NULL, NULL]"}, + {"[[Hello, World], [This, is, a, nested, array]]"}), + FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, + {"[[With, special, \"characters\"], [like, @, #, $, % \"^\", &, *, (, ), " + "-, _], [=, +, [, ], {, }, |, \\, ;, :, ', '\', <, >, ,, ., /, ?, ~]]"}, + {"[[With, special, \"characters\"], [like, @, #, $, % \"^\", &, *, (, ), " + "-, _], [=, +, [, ], {, }, |, \\, ;, :, ', '\', <, >, ,, ., /, ?, ~]]"}, + {""}, + {"[[With, special, characters], [like, @, #, $, % \"^\", &, *, (, ), -, " + "_], [=, +, [, ], {, }, |, \\, ;, :, ', '\', <, >, ,, ., /, ?, ~]]"})}; + // array type + for (auto type_pair : nested_field_types) { + auto type = std::get<0>(type_pair); + DataTypePtr nested_data_type_ptr = + DataTypeFactory::instance().create_data_type(type, 0, 0); + DataTypePtr nested_array_data_type_ptr = + std::make_shared(make_nullable(nested_data_type_ptr)); + + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(nested_array_data_type_ptr)); + + std::cout << "========= This type is " << array_data_type_ptr->get_name() << ": " + << fmt::format("{}", type) << std::endl; + + DataTypeSerDeSPtr serde = array_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = array_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<1>(type_pair).size(); ++i) { + std::string rand_str = std::get<1>(type_pair)[i]; + std::string expect_str = std::get<2>(type_pair)[i]; + std::string expect_from_string_str = std::get<3>(type_pair)[i]; + std::string expect_str_1 = std::get<4>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_from_str:" << expect_from_string_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + // serde + auto col = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + auto col2 = array_data_type_ptr->create_column(); + Status status = array_data_type_ptr->from_string(rb, col2); + if (expect_from_string_str == "") { + EXPECT_EQ(status.ok(), false); + std::cout << "test from_string: " << status.to_json() << std::endl; + } else { + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_from_string_str, rand_s_d.to_string()); + } + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + auto col3 = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } + + // array-map + { + // nested type,test string, expect string(option.converted_from_string=false), expect_from_string, expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, + std::vector, std::vector> + FieldType_RandStr; + std::vector nested_field_types = {FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"[{\"2cKtIM-L1mOcEm-udR-HcB2\":0.23929040957798242,\"eof2UN-Is0EEuA-H5D-hE58\":0." + "42373055809540094,\"FwUSOB-R8rtK9W-BVG-8wYZ\":0.7680704548628841},{\"qDXU9D-" + "7orr51d-g80-6t5k\":0.6446245786874659,\"bkLjmx-uZ2Ez7F-536-PGqy\":0." + "8880791950937957,\"9Etq4o-FPm37O4-5fk-QWh7\":0.08630489716260481},{\"tu3OMw-" + "mzS0jAx-Dnj-Xm3G\":0.1184199213706042,\"XkhTn0-QFLo8Ks-JXR-k4zk\":0." + "5181239375482816,\"EYC8Dj-GTTp9iB-b4O-QBkO\":0.4491897722178303},{\"sHFGPg-" + "cfA8gya-kfw-IugT\":0.20842299487398452,\"BBQ6e5-OJYRJhC-zki-7rQj\":0." + "3050124830713523,\"mKH57V-YmwCNFq-vs8-vUIX\":0.36446683035480754},{\"HfhEMX-" + "oAMBJCC-YIC-hCqN\":0.8131454631693608,\"xrnTFd-ikONWik-T7J-sL8J\":0." + "37509722558990855,\"SVyEes-77mlzIr-N6c-DkYw\":0.4703053945053086}]"}, + {"[{\"2cKtIM-L1mOcEm-udR-HcB2\":0.23929040957798242, " + "\"eof2UN-Is0EEuA-H5D-hE58\":0.42373055809540094, " + "\"FwUSOB-R8rtK9W-BVG-8wYZ\":0.7680704548628841}, " + "{\"qDXU9D-7orr51d-g80-6t5k\":0.6446245786874659, " + "\"bkLjmx-uZ2Ez7F-536-PGqy\":0.8880791950937957, " + "\"9Etq4o-FPm37O4-5fk-QWh7\":0.08630489716260481}, " + "{\"tu3OMw-mzS0jAx-Dnj-Xm3G\":0.1184199213706042, " + "\"XkhTn0-QFLo8Ks-JXR-k4zk\":0.5181239375482816, " + "\"EYC8Dj-GTTp9iB-b4O-QBkO\":0.4491897722178303}, " + "{\"sHFGPg-cfA8gya-kfw-IugT\":0.20842299487398452, " + "\"BBQ6e5-OJYRJhC-zki-7rQj\":0.3050124830713523, " + "\"mKH57V-YmwCNFq-vs8-vUIX\":0.36446683035480754}, " + "{\"HfhEMX-oAMBJCC-YIC-hCqN\":0.8131454631693608, " + "\"xrnTFd-ikONWik-T7J-sL8J\":0.37509722558990855, " + "\"SVyEes-77mlzIr-N6c-DkYw\":0.4703053945053086}]"}, + {""}, + {"[{2cKtIM-L1mOcEm-udR-HcB2:0.23929040957798242, " + "eof2UN-Is0EEuA-H5D-hE58:0.42373055809540094, " + "FwUSOB-R8rtK9W-BVG-8wYZ:0.7680704548628841}, " + "{qDXU9D-7orr51d-g80-6t5k:0.6446245786874659, " + "bkLjmx-uZ2Ez7F-536-PGqy:0.8880791950937957, " + "9Etq4o-FPm37O4-5fk-QWh7:0.08630489716260481}, " + "{tu3OMw-mzS0jAx-Dnj-Xm3G:0.1184199213706042, " + "XkhTn0-QFLo8Ks-JXR-k4zk:0.5181239375482816, " + "EYC8Dj-GTTp9iB-b4O-QBkO:0.4491897722178303}, " + "{sHFGPg-cfA8gya-kfw-IugT:0.20842299487398452, " + "BBQ6e5-OJYRJhC-zki-7rQj:0.3050124830713523, " + "mKH57V-YmwCNFq-vs8-vUIX:0.36446683035480754}, " + "{HfhEMX-oAMBJCC-YIC-hCqN:0.8131454631693608, " + "xrnTFd-ikONWik-T7J-sL8J:0.37509722558990855, " + "SVyEes-77mlzIr-N6c-DkYw:0.4703053945053086}]"})}; + for (auto type_pair : nested_field_types) { + auto key_type = std::get<0>(type_pair); + DataTypePtr nested_key_data_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + auto val_type = std::get<1>(type_pair); + DataTypePtr nested_value_data_type_ptr = + DataTypeFactory::instance().create_data_type(val_type, 0, 0); + + DataTypePtr nested_map_data_type_ptr = + std::make_shared(make_nullable(nested_key_data_type_ptr), + make_nullable(nested_value_data_type_ptr)); + + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(nested_map_data_type_ptr)); + + std::cout << "========= This type is " << array_data_type_ptr->get_name() << std::endl; + + DataTypeSerDeSPtr serde = array_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = array_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::string expect_from_string_str = std::get<4>(type_pair)[i]; + std::string expect_str_1 = std::get<5>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_from_str:" << expect_from_string_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + // serde + auto col = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + auto col2 = array_data_type_ptr->create_column(); + Status status = array_data_type_ptr->from_string(rb, col2); + if (expect_from_string_str == "") { + EXPECT_EQ(status.ok(), false); + std::cout << "test from_string: " << status.to_json() << std::endl; + } else { + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_from_string_str, rand_s_d.to_string()); + } + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + auto col3 = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } + + // map-scala-array (map>) + { + // nested type,test string, expect string(option.converted_from_string=false), expect_from_string, expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, + std::vector, std::vector> + FieldType_RandStr; + std::vector nested_field_types = {FieldType_RandStr( + // map> + FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"{\"5Srn6n-SP9fOS3-khz-Ljwt\":[0.8537551959339321,0.13473869413865858,0." + "9806016478238296,0.23014415892941564,0.26853530959759686,0.05484935641143551,0." + "11181328816302816,0.26510985318905933,0.6350885463275475,0.18209889263574142]," + "\"vrQmBC-2WlpWML-V5S-OLgM\":[0.6982221340596457,0.9260447299229463,0." + "12488042737255534,0.8859407191137862,0.03201490973378984,0.8371916387557367,0." + "7894434066323907,0.29667576138232743,0.9837777568426148,0.7773721913552772]," + "\"3ZbiXK-VvmhFcg-09V-w3g3\":[0.20509046053951785,0.9175575704931109,0." + "305788438361256,0.9923240410251069,0.6612939841907548,0.5922056063112593,0." + "15750800821536715,0.6374743124669565,0.4158097731627699,0.00302193321816846]," + "\"gMswpS-Ele9wHM-Uxp-VxzC\":[0.14378032144751685,0.627919779177473,0." + "6188731271454715,0.8088384184584442,0.8169160298605824,0.9051151670055427,0." + "558001941204895,0.029409463113641787,0.9532987674717762,0.20833228278241533]," + "\"TT9P9f-PXjQnvN-RBx-xRiS\":[0.8276005878909756,0.470950932860423,0." + "2442851528127543,0.710599416715854,0.3353731152359334,0.622947602340124,0." + "30675353671676797,0.8190741661938367,0.633630372770242,0.9436322366112492]," + "\"gLAnZc-oF7PC9o-ryd-MOXr\":[0.9742716809818137,0.9114038616933997,0." + "47459239268645104,0.6054569900795078,0.5515590901916287,0.8833310208917589,0." + "96476090778518,0.8873874315592357,0.3577701257062156,0.6993447306713452]," + "\"zrq6BY-7FJg3hc-Dd1-bAJn\":[0.1038405592062176,0.6757819253774818,0." + "6386535502499314,0.23598674876945303,0.11046582465777044,0.6426056925348297,0." + "17289073092250662,0.37116009951425233,0.594677969672274,0.49351456402872274]," + "\"gCKqtW-bLaoxgZ-CuW-M2re\":[0.934169137905867,0.12015121444469123,0." + "5009923777544698,0.4689139716802634,0.7226298925299507,0.33486164698864984,0." + "32944768657449996,0.5051366150918063,0.03228636228382431,0.48211773870118435]," + "\"SWqhI2-XnF9jVR-dT1-Yrtt\":[0.8005897112110444,0.899180582368993,0." + "9232176819588501,0.8615673086606942,0.9248122266449379,0.5586489299212893,0." + "40494513773898455,0.4752644689010731,0.6668395567417462,0.9068738374244337]," + "\"Z85F6M-cy5K4GP-7I5-5KS9\":[0.34761241187833714,0.46467162849990507,0." + "009781307454025168,0.3174295126364216,0.6405423361175397,0.33838144910731327,0." + "328860321648657,0.032638966917555856,0.32782524002924884,0.7675689545937956]," + "\"rlcnbo-tFg1FfP-ra6-D9Z8\":[0.7450713997349928,0.792502852203968,0." + "9034039182796755,0.49131654565079996,0.25223293077647946,0.9827253462450637,0." + "1684868582627418,0.0417161505112974,0.8498128570850716,0.8948779001812955]}"}, + {"{\"5Srn6n-SP9fOS3-khz-Ljwt\":[0.8537551959339321, 0.13473869413865858, " + "0.9806016478238296, 0.23014415892941564, 0.26853530959759686, " + "0.05484935641143551, 0.11181328816302816, 0.26510985318905933, " + "0.6350885463275475, 0.18209889263574142], " + "\"vrQmBC-2WlpWML-V5S-OLgM\":[0.6982221340596457, 0.9260447299229463, " + "0.12488042737255534, 0.8859407191137862, 0.03201490973378984, " + "0.8371916387557367, 0.7894434066323907, 0.29667576138232743, 0.9837777568426148, " + "0.7773721913552772], \"3ZbiXK-VvmhFcg-09V-w3g3\":[0.20509046053951785, " + "0.9175575704931109, 0.305788438361256, 0.9923240410251069, 0.6612939841907548, " + "0.5922056063112593, 0.15750800821536715, 0.6374743124669565, 0.4158097731627699, " + "0.00302193321816846], \"gMswpS-Ele9wHM-Uxp-VxzC\":[0.14378032144751685, " + "0.627919779177473, 0.6188731271454715, 0.8088384184584442, 0.8169160298605824, " + "0.9051151670055427, 0.558001941204895, 0.029409463113641787, 0.9532987674717762, " + "0.20833228278241533], \"TT9P9f-PXjQnvN-RBx-xRiS\":[0.8276005878909756, " + "0.470950932860423, 0.2442851528127543, 0.710599416715854, 0.3353731152359334, " + "0.622947602340124, 0.30675353671676797, 0.8190741661938367, 0.633630372770242, " + "0.9436322366112492], \"gLAnZc-oF7PC9o-ryd-MOXr\":[0.9742716809818137, " + "0.9114038616933997, 0.47459239268645104, 0.6054569900795078, 0.5515590901916287, " + "0.8833310208917589, 0.96476090778518, 0.8873874315592357, 0.3577701257062156, " + "0.6993447306713452], \"zrq6BY-7FJg3hc-Dd1-bAJn\":[0.1038405592062176, " + "0.6757819253774818, 0.6386535502499314, 0.23598674876945303, " + "0.11046582465777044, 0.6426056925348297, 0.17289073092250662, " + "0.37116009951425233, 0.594677969672274, 0.49351456402872274], " + "\"gCKqtW-bLaoxgZ-CuW-M2re\":[0.934169137905867, 0.12015121444469123, " + "0.5009923777544698, 0.4689139716802634, 0.7226298925299507, 0.33486164698864984, " + "0.32944768657449996, 0.5051366150918063, 0.03228636228382431, " + "0.48211773870118435], \"SWqhI2-XnF9jVR-dT1-Yrtt\":[0.8005897112110444, " + "0.899180582368993, 0.9232176819588501, 0.8615673086606942, 0.9248122266449379, " + "0.5586489299212893, 0.40494513773898455, 0.4752644689010731, 0.6668395567417462, " + "0.9068738374244337], \"Z85F6M-cy5K4GP-7I5-5KS9\":[0.34761241187833714, " + "0.46467162849990507, 0.009781307454025168, 0.3174295126364216, " + "0.6405423361175397, 0.33838144910731327, 0.328860321648657, " + "0.032638966917555856, 0.32782524002924884, 0.7675689545937956], " + "\"rlcnbo-tFg1FfP-ra6-D9Z8\":[0.7450713997349928, 0.792502852203968, " + "0.9034039182796755, 0.49131654565079996, 0.25223293077647946, " + "0.9827253462450637, 0.1684868582627418, 0.0417161505112974, 0.8498128570850716, " + "0.8948779001812955]}"}, + {""}, + {"{5Srn6n-SP9fOS3-khz-Ljwt:[0.8537551959339321, 0.13473869413865858, " + "0.9806016478238296, 0.23014415892941564, 0.26853530959759686, " + "0.05484935641143551, 0.11181328816302816, 0.26510985318905933, " + "0.6350885463275475, 0.18209889263574142], " + "vrQmBC-2WlpWML-V5S-OLgM:[0.6982221340596457, 0.9260447299229463, " + "0.12488042737255534, 0.8859407191137862, 0.03201490973378984, " + "0.8371916387557367, 0.7894434066323907, 0.29667576138232743, 0.9837777568426148, " + "0.7773721913552772], 3ZbiXK-VvmhFcg-09V-w3g3:[0.20509046053951785, " + "0.9175575704931109, 0.305788438361256, 0.9923240410251069, 0.6612939841907548, " + "0.5922056063112593, 0.15750800821536715, 0.6374743124669565, 0.4158097731627699, " + "0.00302193321816846], gMswpS-Ele9wHM-Uxp-VxzC:[0.14378032144751685, " + "0.627919779177473, 0.6188731271454715, 0.8088384184584442, 0.8169160298605824, " + "0.9051151670055427, 0.558001941204895, 0.029409463113641787, 0.9532987674717762, " + "0.20833228278241533], TT9P9f-PXjQnvN-RBx-xRiS:[0.8276005878909756, " + "0.470950932860423, 0.2442851528127543, 0.710599416715854, 0.3353731152359334, " + "0.622947602340124, 0.30675353671676797, 0.8190741661938367, 0.633630372770242, " + "0.9436322366112492], gLAnZc-oF7PC9o-ryd-MOXr:[0.9742716809818137, " + "0.9114038616933997, 0.47459239268645104, 0.6054569900795078, 0.5515590901916287, " + "0.8833310208917589, 0.96476090778518, 0.8873874315592357, 0.3577701257062156, " + "0.6993447306713452], zrq6BY-7FJg3hc-Dd1-bAJn:[0.1038405592062176, " + "0.6757819253774818, 0.6386535502499314, 0.23598674876945303, " + "0.11046582465777044, 0.6426056925348297, 0.17289073092250662, " + "0.37116009951425233, 0.594677969672274, 0.49351456402872274], " + "gCKqtW-bLaoxgZ-CuW-M2re:[0.934169137905867, 0.12015121444469123, " + "0.5009923777544698, 0.4689139716802634, 0.7226298925299507, 0.33486164698864984, " + "0.32944768657449996, 0.5051366150918063, 0.03228636228382431, " + "0.48211773870118435], SWqhI2-XnF9jVR-dT1-Yrtt:[0.8005897112110444, " + "0.899180582368993, 0.9232176819588501, 0.8615673086606942, 0.9248122266449379, " + "0.5586489299212893, 0.40494513773898455, 0.4752644689010731, 0.6668395567417462, " + "0.9068738374244337], Z85F6M-cy5K4GP-7I5-5KS9:[0.34761241187833714, " + "0.46467162849990507, 0.009781307454025168, 0.3174295126364216, " + "0.6405423361175397, 0.33838144910731327, 0.328860321648657, " + "0.032638966917555856, 0.32782524002924884, 0.7675689545937956], " + "rlcnbo-tFg1FfP-ra6-D9Z8:[0.7450713997349928, 0.792502852203968, " + "0.9034039182796755, 0.49131654565079996, 0.25223293077647946, " + "0.9827253462450637, 0.1684868582627418, 0.0417161505112974, 0.8498128570850716, " + "0.8948779001812955]}"})}; + for (auto type_pair : nested_field_types) { + auto key_type = std::get<0>(type_pair); + DataTypePtr nested_key_data_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + auto val_type = std::get<1>(type_pair); + DataTypePtr nested_value_data_type_ptr = + DataTypeFactory::instance().create_data_type(val_type, 0, 0); + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(nested_value_data_type_ptr)); + + DataTypePtr map_data_type_ptr = std::make_shared( + make_nullable(nested_key_data_type_ptr), make_nullable(array_data_type_ptr)); + + std::cout << "========= This type is " << map_data_type_ptr->get_name() << std::endl; + + DataTypeSerDeSPtr serde = map_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = map_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::string expect_from_string_str = std::get<4>(type_pair)[i]; + std::string expect_str_1 = std::get<5>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_from_str:" << expect_from_string_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + // serde + auto col = map_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + std::cout << "test : " << st.to_json() << std::endl; + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + auto col2 = map_data_type_ptr->create_column(); + Status status = map_data_type_ptr->from_string(rb, col2); + if (expect_from_string_str == "") { + EXPECT_EQ(status.ok(), false); + std::cout << "test from_string: " << status.to_json() << std::endl; + } else { + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_from_string_str, rand_s_d.to_string()); + } + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + auto col3 = map_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } + + // map-scala-map (map>) + { + // nested type,test string, expect string(option.converted_from_string=false), expect_from_string, expect string(option.converted_from_string=true) + typedef std::tuple, std::vector, + std::vector, std::vector> + FieldType_RandStr; + std::vector nested_field_types = {FieldType_RandStr( + FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE, + {"{\"5H6iPe-CRvVE5Q-QnG-8WQb\":{},\"stDa6g-GML89aZ-w5u-LBe0\":{\"Vlekcq-LDCMo6f-" + "J7U-6rwB\":0.15375824233866453,\"4ljyNE-JMK1bSp-c05-EajL\":0.36153399717116075}," + "\"URvXyY-SMttaG4-Zol-mPak\":{\"xVaeqR-cj8I6EM-3Nt-queD\":0.003968938824538082," + "\"Vt2mSs-wacYDvl-qUi-B7kI\":0.6900852274982441,\"i3cJJh-oskdqti-KGU-U6gC\":0." + "40773692843073994},\"N3R9TI-jtBPGOQ-uRc-aWAD\":{\"xmGI09-FaCFrrR-O5J-29eu\":0." + "7166939407858642,\"fbxIwJ-HLvW94X-tPn-JgKT\":0.05904881148976504,\"ylE7y1-" + "wI3UhjR-ecQ-bNfo\":0.9293354174058581,\"zA0pEV-Lm8g4wq-NJc-TDou\":0." + "4000067127237942}}"}, + {"{\"5H6iPe-CRvVE5Q-QnG-8WQb\":{}, " + "\"stDa6g-GML89aZ-w5u-LBe0\":{\"Vlekcq-LDCMo6f-J7U-6rwB\":0.15375824233866453, " + "\"4ljyNE-JMK1bSp-c05-EajL\":0.36153399717116075}, " + "\"URvXyY-SMttaG4-Zol-mPak\":{\"xVaeqR-cj8I6EM-3Nt-queD\":0.003968938824538082, " + "\"Vt2mSs-wacYDvl-qUi-B7kI\":0.6900852274982441, " + "\"i3cJJh-oskdqti-KGU-U6gC\":0.40773692843073994}, " + "\"N3R9TI-jtBPGOQ-uRc-aWAD\":{\"xmGI09-FaCFrrR-O5J-29eu\":0.7166939407858642, " + "\"fbxIwJ-HLvW94X-tPn-JgKT\":0.05904881148976504, " + "\"ylE7y1-wI3UhjR-ecQ-bNfo\":0.9293354174058581, " + "\"zA0pEV-Lm8g4wq-NJc-TDou\":0.4000067127237942}}"}, + {""}, + {"{5H6iPe-CRvVE5Q-QnG-8WQb:{}, " + "stDa6g-GML89aZ-w5u-LBe0:{Vlekcq-LDCMo6f-J7U-6rwB:0.15375824233866453, " + "4ljyNE-JMK1bSp-c05-EajL:0.36153399717116075}, " + "URvXyY-SMttaG4-Zol-mPak:{xVaeqR-cj8I6EM-3Nt-queD:0.003968938824538082, " + "Vt2mSs-wacYDvl-qUi-B7kI:0.6900852274982441, " + "i3cJJh-oskdqti-KGU-U6gC:0.40773692843073994}, " + "N3R9TI-jtBPGOQ-uRc-aWAD:{xmGI09-FaCFrrR-O5J-29eu:0.7166939407858642, " + "fbxIwJ-HLvW94X-tPn-JgKT:0.05904881148976504, " + "ylE7y1-wI3UhjR-ecQ-bNfo:0.9293354174058581, " + "zA0pEV-Lm8g4wq-NJc-TDou:0.4000067127237942}}"})}; + for (auto type_pair : nested_field_types) { + auto key_type = std::get<0>(type_pair); + DataTypePtr nested_key_data_type_ptr = + DataTypeFactory::instance().create_data_type(key_type, 0, 0); + auto val_type = std::get<1>(type_pair); + DataTypePtr nested_value_data_type_ptr = + DataTypeFactory::instance().create_data_type(val_type, 0, 0); + + DataTypePtr nested_map_data_type_ptr = + std::make_shared(make_nullable(nested_key_data_type_ptr), + make_nullable(nested_value_data_type_ptr)); + + DataTypePtr array_data_type_ptr = + std::make_shared(make_nullable(std::make_shared()), + make_nullable(nested_map_data_type_ptr)); + + std::cout << " ========= ========= This type is " << array_data_type_ptr->get_name() + << std::endl; + + DataTypeSerDeSPtr serde = array_data_type_ptr->get_serde(); + DataTypeSerDeSPtr serde_1 = array_data_type_ptr->get_serde(); + DataTypeSerDe::FormatOptions formatOptions; + + for (int i = 0; i < std::get<2>(type_pair).size(); ++i) { + std::string rand_str = std::get<2>(type_pair)[i]; + std::string expect_str = std::get<3>(type_pair)[i]; + std::string expect_from_string_str = std::get<4>(type_pair)[i]; + std::string expect_str_1 = std::get<5>(type_pair)[i]; + std::cout << "rand_str:" << rand_str << std::endl; + std::cout << "expect_str:" << expect_str << std::endl; + std::cout << "expect_from_str:" << expect_from_string_str << std::endl; + std::cout << "expect_str_can_format_from_string:" << expect_str << std::endl; + { + // serde + auto col = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + formatOptions.converted_from_string = false; + Status st = serde->deserialize_one_cell_from_text(*col, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test : " << rand_s_d << std::endl; + EXPECT_EQ(expect_str, rand_s_d.to_string()); + } + } + { + // from_string + ReadBuffer rb(rand_str.data(), rand_str.size()); + auto col2 = array_data_type_ptr->create_column(); + Status status = array_data_type_ptr->from_string(rb, col2); + if (expect_from_string_str == "") { + EXPECT_EQ(status.ok(), false); + std::cout << "test from_string: " << status.to_json() << std::endl; + } else { + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde->serialize_one_cell_to_text(*col2, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + std::cout << "test from string: " << rand_s_d << std::endl; + EXPECT_EQ(expect_from_string_str, rand_s_d.to_string()); + } + } + { + formatOptions.converted_from_string = true; + std::cout << "======== change " << formatOptions.converted_from_string + << " with rand_str: " << rand_str << std::endl; + auto col3 = array_data_type_ptr->create_column(); + Slice slice(rand_str.data(), rand_str.size()); + Status st = + serde_1->deserialize_one_cell_from_text(*col3, slice, formatOptions); + if (expect_str == "") { + EXPECT_EQ(st.ok(), false); + std::cout << st.to_json() << std::endl; + } else { + EXPECT_EQ(st.ok(), true); + auto ser_col = ColumnString::create(); + ser_col->reserve(1); + VectorBufferWriter buffer_writer(*ser_col.get()); + serde_1->serialize_one_cell_to_text(*col3, 0, buffer_writer, formatOptions); + buffer_writer.commit(); + StringRef rand_s_d = ser_col->get_data_at(0); + EXPECT_EQ(expect_str_1, rand_s_d.to_string()); + } + } + } + } + } +} + +TEST(TextSerde, test_slice) { + Slice slice("[\"hello\", \"world\"]"); + slice.remove_prefix(1); + slice.remove_suffix(1); + std::vector slices; + slices.emplace_back(slice); + // size_t slice_size = slice.size; + bool has_quote = false; + int nested_level = 0; + + for (int idx = 0; idx < slice.size; ++idx) { + char c = slice[idx]; + std::cout << "c:" << c << " " << fmt::format("{}, {}", c == '[', c == ']') << std::endl; + if (c == '"' || c == '\'') { + has_quote = !has_quote; + } else if (!has_quote && (c == '[' || c == '{')) { + ++nested_level; + } else if (!has_quote && (c == ']' || c == '}')) { + --nested_level; + } else if (!has_quote && nested_level == 0 && c == ',') { + // if meet collection_delimiter and not in quote, we can make it as an item. + slices.back().remove_suffix(slice.size - idx); + // add next total slice.(slice data will not change, so we can use slice directly) + // skip delimiter + std::cout << "back: " << slices.back().to_string() << std::endl; + std::cout << "insert: " << Slice(slice.data + idx + 1, slice.size - idx - 1).to_string() + << std::endl; + Slice next(slice.data + idx + 1, slice.size - idx - 1); + next.trim_prefix(); + slices.emplace_back(next); + } + } + std::cout << "slices size: " << nested_level << std::endl; + for (auto s : slices) { + std::cout << s.to_string() << std::endl; + } +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/regression-test/data/query_p0/sql_functions/conditional_functions/test_if.out b/regression-test/data/query_p0/sql_functions/conditional_functions/test_if.out index 832f69157d4907..8f9419b6de200a 100644 --- a/regression-test/data/query_p0/sql_functions/conditional_functions/test_if.out +++ b/regression-test/data/query_p0/sql_functions/conditional_functions/test_if.out @@ -1,13 +1,13 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !select -- -\N +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +\N --- !select -- -["1970-01-01", "1970-01-01"] +-- !select -- +["1970-01-01", "1970-01-01"] --- !select -- -["1970-01-01", "1970-01-01"] +-- !select -- +["1970-01-01", "1970-01-01"] --- !select -- -[] +-- !select -- +[]