Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 35 additions & 7 deletions be/src/vec/columns/column_variant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,6 @@ namespace doris::vectorized {
namespace {

DataTypePtr create_array_of_type(PrimitiveType type, size_t num_dimensions, bool is_nullable) {
if (type == ColumnVariant::MOST_COMMON_TYPE_ID) {
// JSONB type MUST NOT wrapped in ARRAY column, it should be top level.
// So we ignored num_dimensions.
return is_nullable ? make_nullable(std::make_shared<ColumnVariant::MostCommonType>())
: std::make_shared<ColumnVariant::MostCommonType>();
}
DataTypePtr result = type == PrimitiveType::INVALID_TYPE
? is_nullable ? make_nullable(std::make_shared<DataTypeNothing>())
: std::dynamic_pointer_cast<IDataType>(
Expand Down Expand Up @@ -897,10 +891,25 @@ void ColumnVariant::Subcolumn::get(size_t n, Field& res) const {
return;
}
if (is_finalized()) {
// TODO(hangyu) : we should use data type to get the field value
// here is a special case for Array<JsonbField>
if (least_common_type.get_type_id() == PrimitiveType::TYPE_ARRAY &&
least_common_type.get_base_type_id() == PrimitiveType::TYPE_JSONB) {
// Array of JsonbField is special case
get_finalized_column().get(n, res);
// here we will get a Array<String> Field or NULL, if it is Array<String>, we need to convert it to Array<JsonbField>
convert_array_string_to_array_jsonb(res);
return;
}

// here is a special case for JsonbField
if (least_common_type.get_base_type_id() == PrimitiveType::TYPE_JSONB) {
// JsonbFiled is special case
res = Field::create_field<TYPE_JSONB>(JsonbField());
get_finalized_column().get(n, res);
return;
}

// common type to get the field value
get_finalized_column().get(n, res);
return;
}
Expand Down Expand Up @@ -2027,4 +2036,23 @@ bool ColumnVariant::try_insert_default_from_nested(const Subcolumns::NodePtr& en
return true;
}

void ColumnVariant::Subcolumn::convert_array_string_to_array_jsonb(Field& array_field) {
if (array_field.is_null()) {
return;
}
if (array_field.get_type() != PrimitiveType::TYPE_ARRAY) {
return;
}
Field converted_res = Field::create_field<TYPE_ARRAY>(Array());
for (auto& item : array_field.get<Array&>()) {
Field jsonb_item;
DCHECK(item.get_type() == PrimitiveType::TYPE_STRING);
auto& string_item = item.get<String&>();
jsonb_item = Field::create_field<TYPE_JSONB>(
JsonbField(string_item.c_str(), string_item.size()));
converted_res.get<Array&>().emplace_back(std::move(jsonb_item));
}
array_field = std::move(converted_res);
}

} // namespace doris::vectorized
4 changes: 4 additions & 0 deletions be/src/vec/columns/column_variant.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class ColumnVariant final : public COWHelper<IColumn, ColumnVariant> {
constexpr static PrimitiveType MOST_COMMON_TYPE_ID = PrimitiveType::TYPE_JSONB;
// Nullable(Array(Nullable(Object)))
const static DataTypePtr NESTED_TYPE;

// Finlize mode for subcolumns, write mode will estimate which subcolumns are sparse columns(too many null values inside column),
// merge and encode them into a shared column in root column. Only affects in flush block to segments.
// Otherwise read mode should be as default mode.
Expand Down Expand Up @@ -178,6 +179,9 @@ class ColumnVariant final : public COWHelper<IColumn, ColumnVariant> {

void add_new_column_part(DataTypePtr type);

/// Converts Array<String> to Array<JsonbField> for special case handling
static void convert_array_string_to_array_jsonb(Field& array_field);

friend class ColumnVariant;

private:
Expand Down
18 changes: 11 additions & 7 deletions be/src/vec/data_types/convert_field_to_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/exception.h"
#include "common/status.h"
#include "util/bitmap_value.h"
#include "util/jsonb_document.h"
#include "util/jsonb_writer.h"
#include "vec/common/field_visitors.h"
#include "vec/common/typeid_cast.h"
Expand Down Expand Up @@ -90,6 +91,11 @@ class FieldVisitorToJsonb : public StaticVisitor<void> {
writer->writeString(x);
writer->writeEndString();
}
void operator()(const JsonbField& x, JsonbWriter* writer) const {
JsonbDocument* doc;
THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(x.get_value(), x.get_size(), &doc));
writer->writeValue(doc->getValue());
}
void operator()(const Array& x, JsonbWriter* writer) const;

void operator()(const Tuple& x, JsonbWriter* writer) const {
Expand Down Expand Up @@ -125,9 +131,6 @@ class FieldVisitorToJsonb : public StaticVisitor<void> {
void operator()(const Map& x, JsonbWriter* writer) const {
throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted");
}
void operator()(const JsonbField& x, JsonbWriter* writer) const {
throw doris::Exception(doris::ErrorCode::NOT_IMPLEMENTED_ERROR, "Not implemeted");
}
};

void FieldVisitorToJsonb::operator()(const Array& x, JsonbWriter* writer) const {
Expand All @@ -143,7 +146,7 @@ void FieldVisitorToJsonb::operator()(const Array& x, JsonbWriter* writer) const
namespace {
template <typename From, PrimitiveType T>
Field convert_numeric_type_impl(const Field& from) {
typename PrimitiveTypeTraits<T>::CppType result;
typename PrimitiveTypeTraits<T>::ColumnItemType result;
if (!accurate::convertNumeric(from.get<From>(), result)) {
return {};
}
Expand All @@ -153,11 +156,11 @@ Field convert_numeric_type_impl(const Field& from) {
template <PrimitiveType T>
void convert_numric_type(const Field& from, const IDataType& type, Field* to) {
if (from.get_type() == PrimitiveType::TYPE_BIGINT) {
*to = convert_numeric_type_impl<Int64, TYPE_BIGINT>(from);
*to = convert_numeric_type_impl<Int64, T>(from);
} else if (from.get_type() == PrimitiveType::TYPE_DOUBLE) {
*to = convert_numeric_type_impl<Float64, TYPE_DOUBLE>(from);
*to = convert_numeric_type_impl<Float64, T>(from);
} else if (from.get_type() == PrimitiveType::TYPE_LARGEINT) {
*to = convert_numeric_type_impl<Int128, TYPE_LARGEINT>(from);
*to = convert_numeric_type_impl<Int128, T>(from);
} else {
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
"Type mismatch in IN or VALUES section. Expected: {}. Got: {}",
Expand Down Expand Up @@ -280,4 +283,5 @@ void convert_field_to_type(const Field& from_value, const IDataType& to_type, Fi
return convert_field_to_typeImpl(from_value, to_type, from_type_hint, to);
}
}
#include "common/compile_check_end.h"
} // namespace doris::vectorized
35 changes: 27 additions & 8 deletions be/src/vec/functions/function_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ struct ConvertNothingToJsonb {
}
};

template <PrimitiveType type, typename ColumnType>
template <PrimitiveType type, typename ColumnType, typename ToDataType>
struct ConvertImplFromJsonb {
static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
const uint32_t result, size_t input_rows_count) {
Expand Down Expand Up @@ -799,6 +799,18 @@ struct ConvertImplFromJsonb {
res[i] = 0;
continue;
}

// if value is string, convert by parse, otherwise the result is null if ToDataType is not string
if (value->isString()) {
const auto* blob = value->unpack<JsonbBinaryVal>();
const auto& data = blob->getBlob();
size_t len = blob->getBlobLen();
ReadBuffer rb((char*)(data), len);
bool parsed = try_parse_impl<ToDataType>(res[i], rb, context);
null_map[i] = !parsed;
continue;
}

if constexpr (type == PrimitiveType::TYPE_BOOLEAN) {
// cast from json value to boolean type
if (value->isTrue()) {
Expand Down Expand Up @@ -1690,19 +1702,26 @@ class FunctionCast final : public IFunctionBase {
bool jsonb_string_as_string) const {
switch (to_type->get_primitive_type()) {
case PrimitiveType::TYPE_BOOLEAN:
return &ConvertImplFromJsonb<PrimitiveType::TYPE_BOOLEAN, ColumnUInt8>::execute;
return &ConvertImplFromJsonb<PrimitiveType::TYPE_BOOLEAN, ColumnUInt8,
DataTypeUInt8>::execute;
case PrimitiveType::TYPE_TINYINT:
return &ConvertImplFromJsonb<PrimitiveType::TYPE_TINYINT, ColumnInt8>::execute;
return &ConvertImplFromJsonb<PrimitiveType::TYPE_TINYINT, ColumnInt8,
DataTypeInt8>::execute;
case PrimitiveType::TYPE_SMALLINT:
return &ConvertImplFromJsonb<PrimitiveType::TYPE_SMALLINT, ColumnInt16>::execute;
return &ConvertImplFromJsonb<PrimitiveType::TYPE_SMALLINT, ColumnInt16,
DataTypeInt16>::execute;
case PrimitiveType::TYPE_INT:
return &ConvertImplFromJsonb<PrimitiveType::TYPE_INT, ColumnInt32>::execute;
return &ConvertImplFromJsonb<PrimitiveType::TYPE_INT, ColumnInt32,
DataTypeInt32>::execute;
case PrimitiveType::TYPE_BIGINT:
return &ConvertImplFromJsonb<PrimitiveType::TYPE_BIGINT, ColumnInt64>::execute;
return &ConvertImplFromJsonb<PrimitiveType::TYPE_BIGINT, ColumnInt64,
DataTypeInt64>::execute;
case PrimitiveType::TYPE_LARGEINT:
return &ConvertImplFromJsonb<PrimitiveType::TYPE_LARGEINT, ColumnInt128>::execute;
return &ConvertImplFromJsonb<PrimitiveType::TYPE_LARGEINT, ColumnInt128,
DataTypeInt128>::execute;
case PrimitiveType::TYPE_DOUBLE:
return &ConvertImplFromJsonb<PrimitiveType::TYPE_DOUBLE, ColumnFloat64>::execute;
return &ConvertImplFromJsonb<PrimitiveType::TYPE_DOUBLE, ColumnFloat64,
DataTypeFloat64>::execute;
case PrimitiveType::TYPE_STRING:
case PrimitiveType::TYPE_CHAR:
case PrimitiveType::TYPE_VARCHAR:
Expand Down
12 changes: 11 additions & 1 deletion be/src/vec/json/json_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,14 @@ void JSONDataParser<ParserImpl>::traverse(const Element& element, ParseContext&
if (element.isObject()) {
traverseObject(element.getObject(), ctx);
} else if (element.isArray()) {
if (ctx.has_nested_in_flatten) {
throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
"Nesting of array in Nested array within variant subcolumns is "
"currently not supported.");
}
has_nested = false;
check_has_nested_object(element);
ctx.has_nested_in_flatten = has_nested && ctx.enable_flatten_nested;
if (has_nested && !ctx.enable_flatten_nested) {
// Parse nested arrays to JsonbField
JsonbWriter writer;
Expand All @@ -71,6 +77,8 @@ void JSONDataParser<ParserImpl>::traverse(const Element& element, ParseContext&
} else {
traverseArray(element.getArray(), ctx);
}
// we should set has_nested_in_flatten to false when traverse array finished for next array otherwise it will be true for next array
ctx.has_nested_in_flatten = false;
} else {
ctx.paths.push_back(ctx.builder.get_parts());
ctx.values.push_back(getValueAsField(element));
Expand Down Expand Up @@ -137,6 +145,7 @@ template <typename ParserImpl>
void JSONDataParser<ParserImpl>::traverseArray(const JSONArray& array, ParseContext& ctx) {
/// Traverse elements of array and collect an array of fields by each path.
ParseArrayContext array_ctx;
array_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten;
array_ctx.total_size = array.size();
for (auto it = array.begin(); it != array.end(); ++it) {
traverseArrayElement(*it, array_ctx);
Expand All @@ -162,8 +171,9 @@ template <typename ParserImpl>
void JSONDataParser<ParserImpl>::traverseArrayElement(const Element& element,
ParseArrayContext& ctx) {
ParseContext element_ctx;
element_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten;
traverse(element, element_ctx);
auto& [_, paths, values, flatten_nested] = element_ctx;
auto& [_, paths, values, flatten_nested, has_nested] = element_ctx;
size_t size = paths.size();
size_t keys_to_update = ctx.arrays_by_path.size();
for (size_t i = 0; i < size; ++i) {
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/json/json_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ class JSONDataParser {
std::vector<PathInData::Parts> paths;
std::vector<Field> values;
bool enable_flatten_nested = false;
bool has_nested_in_flatten = false;
};
using PathPartsWithArray = std::pair<PathInData::Parts, Array>;
using PathToArray = phmap::flat_hash_map<UInt128, PathPartsWithArray, UInt128TrivialHash>;
Expand All @@ -155,6 +156,7 @@ class JSONDataParser {
size_t total_size = 0;
PathToArray arrays_by_path;
KeyToSizes nested_sizes_by_key;
bool has_nested_in_flatten = false;
};
void traverse(const Element& element, ParseContext& ctx);
void traverseObject(const JSONObject& object, ParseContext& ctx);
Expand Down
Loading
Loading