-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[Serde](Variant) support arrow serialization for varint type #32780
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |||||||||
|
|
||||||||||
| #include <rapidjson/stringbuffer.h> | ||||||||||
|
|
||||||||||
| #include "common/exception.h" | ||||||||||
| #include "common/status.h" | ||||||||||
| #include "vec/columns/column.h" | ||||||||||
| #include "vec/columns/column_object.h" | ||||||||||
|
|
@@ -95,6 +96,28 @@ void DataTypeObjectSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV | |||||||||
| variant.insert(field); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| void DataTypeObjectSerDe::write_column_to_arrow(const IColumn& column, const NullMap* null_map, | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: method 'write_column_to_arrow' can be made static [readability-convert-member-functions-to-static]
Suggested change
be/src/vec/data_types/serde/data_type_object_serde.cpp:99: - int end, const cctz::time_zone& ctz) const {
+ int end, const cctz::time_zone& ctz) {
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: method 'write_column_to_arrow' can be made static [readability-convert-member-functions-to-static]
Suggested change
be/src/vec/data_types/serde/data_type_object_serde.cpp:100: - int end, const cctz::time_zone& ctz) const {
+ int end, const cctz::time_zone& ctz) { |
||||||||||
| arrow::ArrayBuilder* array_builder, int start, | ||||||||||
| int end, const cctz::time_zone& ctz) const { | ||||||||||
| const auto* var = check_and_get_column<ColumnObject>(column); | ||||||||||
| auto& builder = assert_cast<arrow::StringBuilder&>(*array_builder); | ||||||||||
| for (size_t i = start; i < end; ++i) { | ||||||||||
| if (null_map && (*null_map)[i]) { | ||||||||||
| checkArrowStatus(builder.AppendNull(), column.get_name(), | ||||||||||
| array_builder->type()->name()); | ||||||||||
| } else { | ||||||||||
| std::string serialized_value; | ||||||||||
| if (!var->serialize_one_row_to_string(i, &serialized_value)) { | ||||||||||
| throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Failed to serialize variant {}", | ||||||||||
| var->dump_structure()); | ||||||||||
| } | ||||||||||
| checkArrowStatus(builder.Append(serialized_value.data(), | ||||||||||
| static_cast<int>(serialized_value.size())), | ||||||||||
| column.get_name(), array_builder->type()->name()); | ||||||||||
| } | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| } // namespace vectorized | ||||||||||
|
|
||||||||||
| } // namespace doris | ||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -73,10 +73,7 @@ class DataTypeObjectSerDe : public DataTypeSerDe { | |
|
|
||
| void write_column_to_arrow(const IColumn& column, const NullMap* null_map, | ||
| arrow::ArrayBuilder* array_builder, int start, int end, | ||
| const cctz::time_zone& ctz) const override { | ||
| throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, | ||
| "write_column_to_arrow with type " + column.get_name()); | ||
| } | ||
| const cctz::time_zone& ctz) const override; | ||
| void read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int start, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dose we need to support read from arrow? for some spark situation : make string value to variant data type value into doris
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. currently this pr is to solve the connector problem for variant, for read from arrow we could use cast expr like cast(xxx as variant) |
||
| int end, const cctz::time_zone& ctz) const override { | ||
| throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: method 'write_column_to_arrow' can be made static [readability-convert-member-functions-to-static]
be/src/vec/data_types/serde/data_type_object_serde.cpp:99: