Skip to content
6 changes: 0 additions & 6 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,6 @@ DataTypePtr create_array_of_type(TypeIndex type, size_t num_dimensions, bool is_
if (type == TypeIndex::Nothing) {
return std::make_shared<DataTypeNothing>();
}
if (type == ColumnObject::MOST_COMMON_TYPE_ID) {
// JSONB type MUST NOT wrapped in ARRAY column, it should be top level.
// So we ignored num_dimensions.
return is_nullable ? make_nullable(std::make_shared<ColumnObject::MostCommonType>())
: std::make_shared<ColumnObject::MostCommonType>();
}
DataTypePtr result =
DataTypeFactory::instance().create_data_type(type, is_nullable, precision, scale);
for (size_t i = 0; i < num_dimensions; ++i) {
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/columns/column_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {
constexpr static TypeIndex MOST_COMMON_TYPE_ID = TypeIndex::JSONB;
// Nullable(Array(Nullable(Object)))
const static DataTypePtr NESTED_TYPE;

// Finlize mode for subcolumns, write mode will estimate which subcolumns are sparse columns(too many null values inside column),
// merge and encode them into a shared column in root column. Only affects in flush block to segments.
// Otherwise read mode should be as default mode.
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/convert_field_to_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,4 +335,4 @@ void convert_field_to_type(const Field& from_value, const IDataType& to_type, Fi
return convert_field_to_typeImpl(from_value, to_type, from_type_hint, to);
}
}
} // namespace doris::vectorized
} // namespace doris::vectorized
12 changes: 11 additions & 1 deletion be/src/vec/json/json_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,14 @@ void JSONDataParser<ParserImpl>::traverse(const Element& element, ParseContext&
if (element.isObject()) {
traverseObject(element.getObject(), ctx);
} else if (element.isArray()) {
if (ctx.has_nested_in_flatten) {
throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
"Nesting of array in Nested array within variant subcolumns is "
"currently not supported.");
}
has_nested = false;
check_has_nested_object(element);
ctx.has_nested_in_flatten = has_nested && ctx.enable_flatten_nested;
if (has_nested && !ctx.enable_flatten_nested) {
// Parse nested arrays to JsonbField
JsonbWriter writer;
Expand All @@ -71,6 +77,8 @@ void JSONDataParser<ParserImpl>::traverse(const Element& element, ParseContext&
} else {
traverseArray(element.getArray(), ctx);
}
// we should set has_nested_in_flatten to false when traverse array finished for next array otherwise it will be true for next array
ctx.has_nested_in_flatten = false;
} else {
ctx.paths.push_back(ctx.builder.get_parts());
ctx.values.push_back(getValueAsField(element));
Expand Down Expand Up @@ -137,6 +145,7 @@ template <typename ParserImpl>
void JSONDataParser<ParserImpl>::traverseArray(const JSONArray& array, ParseContext& ctx) {
/// Traverse elements of array and collect an array of fields by each path.
ParseArrayContext array_ctx;
array_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten;
array_ctx.total_size = array.size();
for (auto it = array.begin(); it != array.end(); ++it) {
traverseArrayElement(*it, array_ctx);
Expand All @@ -162,8 +171,9 @@ template <typename ParserImpl>
void JSONDataParser<ParserImpl>::traverseArrayElement(const Element& element,
ParseArrayContext& ctx) {
ParseContext element_ctx;
element_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten;
traverse(element, element_ctx);
auto& [_, paths, values, flatten_nested] = element_ctx;
auto& [_, paths, values, flatten_nested, has_nested] = element_ctx;
size_t size = paths.size();
size_t keys_to_update = ctx.arrays_by_path.size();
for (size_t i = 0; i < size; ++i) {
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/json/json_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ class JSONDataParser {
std::vector<PathInData::Parts> paths;
std::vector<Field> values;
bool enable_flatten_nested = false;
bool has_nested_in_flatten = false;
};
using PathPartsWithArray = std::pair<PathInData::Parts, Array>;
using PathToArray = phmap::flat_hash_map<UInt128, PathPartsWithArray, UInt128TrivialHash>;
Expand All @@ -154,6 +155,7 @@ class JSONDataParser {
size_t total_size = 0;
PathToArray arrays_by_path;
KeyToSizes nested_sizes_by_key;
bool has_nested_in_flatten = false;
};
void traverse(const Element& element, ParseContext& ctx);
void traverseObject(const JSONObject& object, ParseContext& ctx);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
column: variant with allocate size: 40073536
40073536
column: variant with allocate size: 180480
180480
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
column: variant with byte_size: 21849217
21849217
column: variant with byte_size: 1610
1610
Original file line number Diff line number Diff line change
@@ -1 +1 @@
column: variant with hashes: 589097515 with ptr: 2023
column: variant with hashes: 1798947085 with ptr: 15
Original file line number Diff line number Diff line change
@@ -1 +1 @@
column: variant with hashes: 589097515 with ptr: 2023
column: variant with hashes: 1798947085 with ptr: 15

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
column: variant with hashes: 1417894694298673530 with ptr: 2023
column: variant with hashes: 11329750091889114476 with ptr: 15
Original file line number Diff line number Diff line change
@@ -1 +1 @@
column: variant with hashes: 1417894694298673530 with ptr: 2023
column: variant with hashes: 11329750091889114476 with ptr: 15
15 changes: 11 additions & 4 deletions be/test/vec/columns/column_object_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -634,10 +634,17 @@ TEST(ColumnVariantTest, advanced_insert_range_from) {
}
} else if (column->path.get_path().size() == 5) {
EXPECT_EQ(column->data.get_non_null_value_size(), 10);
EXPECT_EQ(assert_cast<const DataTypeNullable*>(column->data.data_types[0].get())
->get_nested_type()
->get_type_id(),
TypeIndex::JSONB);
if (column->path.get_path() == "v.d.d") {
EXPECT_EQ(assert_cast<const DataTypeNullable*>(column->data.data_types[0].get())
->get_nested_type()
->get_type_id(),
TypeIndex::Array);
} else if (column->path.get_path() == "v.c.d") {
EXPECT_EQ(assert_cast<const DataTypeNullable*>(column->data.data_types[0].get())
->get_nested_type()
->get_type_id(),
TypeIndex::JSONB);
}
for (size_t row = 0; row < 5; ++row) {
EXPECT_TRUE(column->data.data[0]->is_null_at(row));
}
Expand Down
60 changes: 11 additions & 49 deletions be/test/vec/columns/column_variant_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,14 @@ static ColumnObject::MutablePtr column_variant;
class ColumnObjectTest : public CommonColumnTest {
protected:
static void SetUpTestSuite() {
root_dir = std::string(getenv("ROOT"));
column_variant = VariantUtil::construct_advanced_varint_column();
std::cout << column_variant->get_name() << std::endl;
root_dir = std::string(getenv("DORIS_HOME"));
// which is /root/doris/be/ut_build_ASAN/test//
std::cout << "root_dir: " << root_dir << std::endl;
test_data_dir = root_dir + "/be/test/data/vec/columns";
test_result_dir = root_dir + "/be/test/expected_result/vec/columns";

column_variant = ColumnObject::create(true);
std::cout << dt_variant->get_name() << std::endl;

load_json_columns_data();
test_data_dir = root_dir + "../../../be/test/data/vec/columns";
test_result_dir = root_dir + "../../../be/test/expected_result/vec/columns";
//load_json_columns_data();
}

static void load_json_columns_data() {
Expand Down Expand Up @@ -220,11 +219,7 @@ TEST_F(ColumnObjectTest, field_test) {
}
};
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
obj = VariantUtil::construct_advanced_varint_column();
EXPECT_TRUE(!obj->empty());
test_func(obj);
}
Expand Down Expand Up @@ -750,11 +745,7 @@ TEST_F(ColumnObjectTest, get_subcolumn) {

TEST_F(ColumnObjectTest, ensure_root_node_type) {
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
obj = VariantUtil::construct_advanced_varint_column();
EXPECT_TRUE(!obj->empty());
// Store original root type
auto root = obj->get_subcolumns().get_root();
Expand Down Expand Up @@ -1188,11 +1179,7 @@ TEST_F(ColumnObjectTest, find_path_lower_bound_in_sparse_data) {
}
};
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
obj = VariantUtil::construct_advanced_varint_column();
EXPECT_TRUE(!obj->empty());
std::cout << "column variant size: " << obj->size() << std::endl;
test_func(obj);
Expand All @@ -1201,11 +1188,7 @@ TEST_F(ColumnObjectTest, find_path_lower_bound_in_sparse_data) {
// used in SparseColumnExtractIterator::_fill_path_column
TEST_F(ColumnObjectTest, fill_path_column_from_sparse_data) {
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
obj = VariantUtil::construct_advanced_varint_column();
EXPECT_TRUE(!obj->empty());
auto sparse_col = obj->get_sparse_column();
auto cloned_sparse = sparse_col->clone_empty();
Expand All @@ -1226,27 +1209,6 @@ TEST_F(ColumnObjectTest, fill_path_column_from_sparse_data) {
EXPECT_ANY_THROW(obj->check_consistency());
}

TEST_F(ColumnObjectTest, not_finalized) {
ColumnObject::MutablePtr obj;
obj = ColumnObject::create(1);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj);
const auto& json_file_arr = test_data_dir_json + "json_variant/array_object_boundary.jsonl";
load_columns_data_from_file(cols, serde, '\n', {0}, json_file_arr);
EXPECT_TRUE(obj->size() == 200);
EXPECT_FALSE(obj->is_finalized());
// test get_finalized_column_ptr/ get_finalized_column for subColumn
auto subcolumns = obj->get_subcolumns();
for (const auto& subcolumn : subcolumns) {
EXPECT_TRUE(subcolumn != nullptr);
EXPECT_FALSE(subcolumn->data.is_finalized());
EXPECT_ANY_THROW(subcolumn->data.get_finalized_column_ptr());
EXPECT_ANY_THROW(subcolumn->data.get_finalized_column());
}
}

doris::vectorized::Field get_field_v2(std::string_view type, size_t array_element_cnt = 0) {
static std::unordered_map<std::string_view, doris::vectorized::Field> field_map;
if (field_map.empty()) {
Expand Down
Loading
Loading