diff --git a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp index 77e23d264fb6ee..96e31010de8ea8 100644 --- a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp @@ -254,8 +254,11 @@ Status HierarchicalDataIterator::_init_container(vectorized::MutableColumnPtr& c MutableColumnPtr column = _root_reader->column->get_ptr(); // container_variant.add_sub_column({}, std::move(column), _root_reader->type); DCHECK(column->size() == nrows); + auto nullable_column = make_nullable(column->get_ptr()); + auto type = make_nullable(_root_reader->type); + // make sure the root type is nullable container = - ColumnObject::create(max_subcolumns_count, _root_reader->type, std::move(column)); + ColumnObject::create(max_subcolumns_count, type, nullable_column->assume_mutable()); } else { auto root_type = vectorized::DataTypeFactory::instance().create_data_type(TypeIndex::Nothing, false); @@ -485,6 +488,26 @@ Status HierarchicalDataIterator::_init_null_map_and_clear_columns( dst_null_map.insert_range_from(*fake_nullable_column, 0, nrows); } } + // root column nullmap need to be reset, for example, the src_null_map is from the whole + // variant column, but the root column rows should reset to null when empty + ColumnObject* variant = nullptr; + if (dst->is_nullable()) { + variant = + &assert_cast(assert_cast(*dst).get_nested_column()); + } else { + variant = &assert_cast(*dst); + } + if (_path.get_parts().empty()) { + // update nullmap for root column, since the original nullmap is from the whole variant column + auto& dst_map_data = + assert_cast(*variant->get_root()).get_null_map_column().get_data(); + for (size_t i = 0; i < variant->get_root()->size(); ++i) { + StringRef ref = variant->get_root()->get_data_at(i); + if (ref.size == 0) { + dst_map_data[i] = 1; // mark null when root jsonb is empty + } + } + } return Status::OK(); } diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 361e108029da42..b7ff43a68d3c56 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1415,31 +1415,37 @@ const ColumnObject::Subcolumn* ColumnObject::get_subcolumn(const PathInData& key return &node->data; } +const std::string_view EMPTY_JSON = "{}"; size_t ColumnObject::Subcolumn::serialize_text_json(size_t n, BufferWritable& output, DataTypeSerDe::FormatOptions opt) const { if (least_common_type.get_base_type_id() == TypeIndex::Nothing) { - output.write(DataTypeSerDe::NULL_IN_COMPLEX_TYPE.data(), - DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size()); - return DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size(); + output.write(EMPTY_JSON.data(), EMPTY_JSON.size()); + return EMPTY_JSON.size(); } size_t ind = n; if (ind < num_of_defaults_in_prefix) { - output.write(DataTypeSerDe::NULL_IN_COMPLEX_TYPE.data(), - DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size()); - return DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size(); + output.write(EMPTY_JSON.data(), EMPTY_JSON.size()); + return EMPTY_JSON.size(); } ind -= num_of_defaults_in_prefix; for (size_t i = 0; i < data.size(); ++i) { - const auto& part = data[i]; + const auto& part = (*data[i]); const auto& part_type_serde = data_serdes[i]; - if (ind < part->size()) { - return part_type_serde->serialize_one_cell_to_json(*part, ind, output, opt); + if (ind < part.size()) { + // special case when null flag is true, but the value is empty string in JSON type, + // other wise will serialize to '\N' + const auto* nullable_col = check_and_get_column(*data[i]); + if (nullable_col && nullable_col->is_null_at(ind)) { + output.write(EMPTY_JSON.data(), EMPTY_JSON.size()); + return EMPTY_JSON.size(); + } + return part_type_serde->serialize_one_cell_to_json(part, ind, output, opt); } - ind -= part->size(); + ind -= part.size(); } throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for serializing JSON is out of range", n); @@ -1725,17 +1731,7 @@ bool ColumnObject::is_visible_root_value(size_t nrow) const { return false; } } - size_t ind = nrow - root->data.num_of_defaults_in_prefix; - // null value as empty json, todo: think a better way to disinguish empty json and null json. - for (const auto& part : root->data.data) { - if (ind < part->size()) { - return !part->get_data_at(ind).empty(); - } - ind -= part->size(); - } - - throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range", - nrow); + return !root->data.is_null_at(nrow); } void ColumnObject::serialize_one_row_to_json_format(int64_t row_num, BufferWritable& output, diff --git a/regression-test/data/variant_p0/test_sub_path_pruning.out b/regression-test/data/variant_p0/test_sub_path_pruning.out index 0c78380b131c0c..4cf36fbc57e2eb 100644 --- a/regression-test/data/variant_p0/test_sub_path_pruning.out +++ b/regression-test/data/variant_p0/test_sub_path_pruning.out @@ -228,12 +228,12 @@ {"b":{"c":{"d":{"e":11}}},"c":{"d":{"e":12}},"d":{"e":13},"e":14} -- !sql_xxx -- -0 {} +0 1 {"c":{"d":{"e":11}}} -- !sql -- + {"c":{"d":{"e":11}}} -{} -- !sql -- 1 1 @@ -248,8 +248,8 @@ -- !sql -- + {"e":11} -{} -- !sql -- 1 1 diff --git a/regression-test/data/variant_p0/variant_hirachinal.out b/regression-test/data/variant_p0/variant_hirachinal.out index 48ed296732668c..c23fce092212c0 100644 --- a/regression-test/data/variant_p0/variant_hirachinal.out +++ b/regression-test/data/variant_p0/variant_hirachinal.out @@ -38,4 +38,21 @@ {"d":2.00000} {} {"d":6.00000} +\N +\N + +-- !sql -- +{"a":1,"b":2,"c":{"d":2.00000}} +{"a":3,"b":4} +{"c":{"d":6.00000}} +\N +{} + +-- !sql -- +1 {} {} +2 {} {} +3 \N {"a":1,"b":2,"c":3,"d":4} + +-- !sql -- +3 \N {"a":1,"b":2,"c":3,"d":4} diff --git a/regression-test/suites/variant_p0/variant_hirachinal.groovy b/regression-test/suites/variant_p0/variant_hirachinal.groovy index 3336ca4d23f70a..54732efa789686 100644 --- a/regression-test/suites/variant_p0/variant_hirachinal.groovy +++ b/regression-test/suites/variant_p0/variant_hirachinal.groovy @@ -39,6 +39,7 @@ suite("regression_test_variant_hirachinal", "variant_type"){ order_qt_sql2 "select cast(v['c'] as string) from var_rs where k = -3 or k = -2 or k = 1 order by k, cast(v['c'] as text) limit 3" + table_name = "var_rs2" sql "DROP TABLE IF EXISTS ${table_name}" sql """ @@ -70,7 +71,19 @@ suite("regression_test_variant_hirachinal", "variant_type"){ sql """insert into ${table_name} values (1, '{"a": 1, "b": 2, "c" : {"d" : 2}}'), (2, '{"a": 3, "b": 4}');""" sql """insert into ${table_name} values (3, '{"c": {"d": 6}}');""" + sql """insert into ${table_name} values (4, NULL);""" + sql """insert into ${table_name} values (5, '{}');""" qt_sql """select v['c'] from ${table_name} order by k;""" + qt_sql """select v from ${table_name} order by k;""" + + sql "DROP TABLE IF EXISTS t" + sql """create table t(a int, v variant, vn variant not null) PROPERTIES ("replication_allocation" = "tag.location.default: 1");""" + sql """insert into t values(1, '{}', '{}');""" + sql """insert into t values(2, '{}', '{}');""" + sql """insert into t values(3, NULL, '{"a" : 1, "b" : 2, "c" : 3, "d" : 4}');""" + qt_sql """select * from t order by a;""" + qt_sql """select * from t where v is null;""" + } \ No newline at end of file