Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,11 @@ Status HierarchicalDataIterator::_init_container(vectorized::MutableColumnPtr& c
MutableColumnPtr column = _root_reader->column->get_ptr();
// container_variant.add_sub_column({}, std::move(column), _root_reader->type);
DCHECK(column->size() == nrows);
auto nullable_column = make_nullable(column->get_ptr());
auto type = make_nullable(_root_reader->type);
// make sure the root type is nullable
container =
ColumnObject::create(max_subcolumns_count, _root_reader->type, std::move(column));
ColumnObject::create(max_subcolumns_count, type, nullable_column->assume_mutable());
} else {
auto root_type =
vectorized::DataTypeFactory::instance().create_data_type(TypeIndex::Nothing, false);
Expand Down Expand Up @@ -485,6 +488,26 @@ Status HierarchicalDataIterator::_init_null_map_and_clear_columns(
dst_null_map.insert_range_from(*fake_nullable_column, 0, nrows);
}
}
// root column nullmap need to be reset, for example, the src_null_map is from the whole
// variant column, but the root column rows should reset to null when empty
ColumnObject* variant = nullptr;
if (dst->is_nullable()) {
variant =
&assert_cast<ColumnObject&>(assert_cast<ColumnNullable&>(*dst).get_nested_column());
} else {
variant = &assert_cast<ColumnObject&>(*dst);
}
if (_path.get_parts().empty()) {
// update nullmap for root column, since the original nullmap is from the whole variant column
auto& dst_map_data =
assert_cast<ColumnNullable&>(*variant->get_root()).get_null_map_column().get_data();
for (size_t i = 0; i < variant->get_root()->size(); ++i) {
StringRef ref = variant->get_root()->get_data_at(i);
if (ref.size == 0) {
dst_map_data[i] = 1; // mark null when root jsonb is empty
}
}
}
return Status::OK();
}

Expand Down
38 changes: 17 additions & 21 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1415,31 +1415,37 @@ const ColumnObject::Subcolumn* ColumnObject::get_subcolumn(const PathInData& key
return &node->data;
}

const std::string_view EMPTY_JSON = "{}";
size_t ColumnObject::Subcolumn::serialize_text_json(size_t n, BufferWritable& output,
DataTypeSerDe::FormatOptions opt) const {
if (least_common_type.get_base_type_id() == TypeIndex::Nothing) {
output.write(DataTypeSerDe::NULL_IN_COMPLEX_TYPE.data(),
DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size());
return DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size();
output.write(EMPTY_JSON.data(), EMPTY_JSON.size());
return EMPTY_JSON.size();
}

size_t ind = n;
if (ind < num_of_defaults_in_prefix) {
output.write(DataTypeSerDe::NULL_IN_COMPLEX_TYPE.data(),
DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size());
return DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size();
output.write(EMPTY_JSON.data(), EMPTY_JSON.size());
return EMPTY_JSON.size();
}

ind -= num_of_defaults_in_prefix;
for (size_t i = 0; i < data.size(); ++i) {
const auto& part = data[i];
const auto& part = (*data[i]);
const auto& part_type_serde = data_serdes[i];

if (ind < part->size()) {
return part_type_serde->serialize_one_cell_to_json(*part, ind, output, opt);
if (ind < part.size()) {
// special case when null flag is true, but the value is empty string in JSON type,
// other wise will serialize to '\N'
const auto* nullable_col = check_and_get_column<ColumnNullable>(*data[i]);
if (nullable_col && nullable_col->is_null_at(ind)) {
output.write(EMPTY_JSON.data(), EMPTY_JSON.size());
return EMPTY_JSON.size();
}
return part_type_serde->serialize_one_cell_to_json(part, ind, output, opt);
}

ind -= part->size();
ind -= part.size();
}
throw doris::Exception(ErrorCode::OUT_OF_BOUND,
"Index ({}) for serializing JSON is out of range", n);
Expand Down Expand Up @@ -1725,17 +1731,7 @@ bool ColumnObject::is_visible_root_value(size_t nrow) const {
return false;
}
}
size_t ind = nrow - root->data.num_of_defaults_in_prefix;
// null value as empty json, todo: think a better way to disinguish empty json and null json.
for (const auto& part : root->data.data) {
if (ind < part->size()) {
return !part->get_data_at(ind).empty();
}
ind -= part->size();
}

throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range",
nrow);
return !root->data.is_null_at(nrow);
}

void ColumnObject::serialize_one_row_to_json_format(int64_t row_num, BufferWritable& output,
Expand Down
6 changes: 3 additions & 3 deletions regression-test/data/variant_p0/test_sub_path_pruning.out
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,12 @@
{"b":{"c":{"d":{"e":11}}},"c":{"d":{"e":12}},"d":{"e":13},"e":14}

-- !sql_xxx --
0 {}
0
1 {"c":{"d":{"e":11}}}

-- !sql --

{"c":{"d":{"e":11}}}
{}

-- !sql --
1 1
Expand All @@ -248,8 +248,8 @@


-- !sql --

{"e":11}
{}

-- !sql --
1 1
Expand Down
17 changes: 17 additions & 0 deletions regression-test/data/variant_p0/variant_hirachinal.out
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,21 @@
{"d":2.00000}
{}
{"d":6.00000}
\N
\N

-- !sql --
{"a":1,"b":2,"c":{"d":2.00000}}
{"a":3,"b":4}
{"c":{"d":6.00000}}
\N
{}

-- !sql --
1 {} {}
2 {} {}
3 \N {"a":1,"b":2,"c":3,"d":4}

-- !sql --
3 \N {"a":1,"b":2,"c":3,"d":4}

13 changes: 13 additions & 0 deletions regression-test/suites/variant_p0/variant_hirachinal.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ suite("regression_test_variant_hirachinal", "variant_type"){
order_qt_sql2 "select cast(v['c'] as string) from var_rs where k = -3 or k = -2 or k = 1 order by k, cast(v['c'] as text) limit 3"


table_name = "var_rs2"
sql "DROP TABLE IF EXISTS ${table_name}"

sql """
Expand Down Expand Up @@ -70,7 +71,19 @@ suite("regression_test_variant_hirachinal", "variant_type"){

sql """insert into ${table_name} values (1, '{"a": 1, "b": 2, "c" : {"d" : 2}}'), (2, '{"a": 3, "b": 4}');"""
sql """insert into ${table_name} values (3, '{"c": {"d": 6}}');"""
sql """insert into ${table_name} values (4, NULL);"""
sql """insert into ${table_name} values (5, '{}');"""

qt_sql """select v['c'] from ${table_name} order by k;"""
qt_sql """select v from ${table_name} order by k;"""

sql "DROP TABLE IF EXISTS t"
sql """create table t(a int, v variant, vn variant not null) PROPERTIES ("replication_allocation" = "tag.location.default: 1");"""
sql """insert into t values(1, '{}', '{}');"""
sql """insert into t values(2, '{}', '{}');"""
sql """insert into t values(3, NULL, '{"a" : 1, "b" : 2, "c" : 3, "d" : 4}');"""
qt_sql """select * from t order by a;"""
qt_sql """select * from t where v is null;"""


}
Loading