Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@

namespace doris::segment_v2 {

struct PathWithColumnAndType {
vectorized::PathInData path;
vectorized::ColumnPtr column;
vectorized::DataTypePtr type;
};

using PathsWithColumnAndType = std::vector<PathWithColumnAndType>;

// Reader for hierarchical data for variant, merge with root(sparse encoded columns)
class HierarchicalDataReader : public ColumnIterator {
public:
Expand Down
8 changes: 2 additions & 6 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -907,11 +907,7 @@ void ColumnObject::try_insert(const Field& field) {
}
const auto& object = field.get<const VariantMap&>();
size_t old_size = size();
for (const auto& [key_str, value] : object) {
PathInData key;
if (!key_str.empty()) {
key = PathInData(key_str);
}
for (const auto& [key, value] : object) {
if (!has_subcolumn(key)) {
bool succ = add_sub_column(key, old_size);
if (!succ) {
Expand Down Expand Up @@ -1004,7 +1000,7 @@ void ColumnObject::get(size_t n, Field& res) const {
entry->data.get(n, field);
// Notice: we treat null as empty field, since we do not distinguish null and empty for Variant type.
if (field.get_type() != Field::Types::Null) {
object.try_emplace(entry->path.get_path(), field);
object.try_emplace(entry->path, field);
}
}
if (object.empty()) {
Expand Down
9 changes: 2 additions & 7 deletions be/src/vec/core/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "util/quantile_state.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
#include "vec/json/path_in_data.h"

namespace doris {
namespace vectorized {
Expand Down Expand Up @@ -153,13 +154,7 @@ DEFINE_FIELD_VECTOR(Tuple);
DEFINE_FIELD_VECTOR(Map);
#undef DEFINE_FIELD_VECTOR

using FieldMap = std::map<String, Field, std::less<String>>;
#define DEFINE_FIELD_MAP(X) \
struct X : public FieldMap { \
using FieldMap::FieldMap; \
}
DEFINE_FIELD_MAP(VariantMap);
#undef DEFINE_FIELD_MAP
using VariantMap = std::map<PathInData, Field>;

class JsonbField {
public:
Expand Down
10 changes: 8 additions & 2 deletions be/src/vec/data_types/data_type_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ int64_t DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
}
PColumnMeta column_meta_pb;
column_meta_pb.set_name(entry->path.get_path());
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
type->to_pb_column_meta(&column_meta_pb);
std::string meta_binary;
column_meta_pb.SerializeToString(&meta_binary);
Expand Down Expand Up @@ -112,6 +113,7 @@ char* DataTypeObject::serialize(const IColumn& column, char* buf, int be_exec_ve
++num_of_columns;
PColumnMeta column_meta_pb;
column_meta_pb.set_name(entry->path.get_path());
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
type->to_pb_column_meta(&column_meta_pb);
std::string meta_binary;
column_meta_pb.SerializeToString(&meta_binary);
Expand Down Expand Up @@ -157,11 +159,15 @@ const char* DataTypeObject::deserialize(const char* buf, MutableColumnPtr* colum
MutableColumnPtr sub_column = type->create_column();
buf = type->deserialize(buf, &sub_column, be_exec_version);

// add subcolumn to column_object
PathInData key;
if (!column_meta_pb.name().empty()) {
if (column_meta_pb.has_column_path()) {
// init from path pb
key.from_protobuf(column_meta_pb.column_path());
} else if (!column_meta_pb.name().empty()) {
// init from name for compatible
key = PathInData {column_meta_pb.name()};
}
// add subcolumn to column_object
column_object->add_sub_column(key, std::move(sub_column), type);
}
size_t num_rows = 0;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/serde/data_type_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void DataTypeSerDe::convert_variant_map_to_rapidjson(
continue;
}
rapidjson::Value key;
key.SetString(item.first.data(), item.first.size());
key.SetString(item.first.get_path().data(), item.first.get_path().size());
rapidjson::Value val;
convert_field_to_rapidjson(item.second, val, allocator);
if (val.IsNull() && item.first.empty()) {
Expand Down
10 changes: 10 additions & 0 deletions be/src/vec/json/json_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <utility>
#include <vector>

#include "runtime/primitive_type.h"
#include "util/jsonb_writer.h"
#include "vec/columns/column.h"
#include "vec/common/string_ref.h"
Expand Down Expand Up @@ -124,6 +125,15 @@ enum class ExtractType {
struct ParseConfig {
bool enable_flatten_nested = false;
};

/// Result of parsing of a document.
/// Contains all paths extracted from document
/// and values which are related to them.
struct ParseResult {
std::vector<PathInData> paths;
std::vector<Field> values;
};

template <typename ParserImpl>
class JSONDataParser {
public:
Expand Down
18 changes: 0 additions & 18 deletions be/src/vec/json/path_in_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@
#include <vector>

#include "gen_cpp/segment_v2.pb.h"
#include "vec/columns/column.h"
#include "vec/common/uint128.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"

namespace doris::vectorized {

Expand Down Expand Up @@ -129,13 +126,6 @@ class PathInDataBuilder {
size_t current_anonymous_array_level = 0;
};
using PathsInData = std::vector<PathInData>;
/// Result of parsing of a document.
/// Contains all paths extracted from document
/// and values which are related to them.
struct ParseResult {
std::vector<PathInData> paths;
std::vector<Field> values;
};

struct PathInDataRef {
const PathInData* ref;
Expand All @@ -148,12 +138,4 @@ struct PathInDataRef {
bool operator==(const PathInDataRef& other) const { return *this->ref == *other.ref; }
};

struct PathWithColumnAndType {
PathInData path;
ColumnPtr column;
DataTypePtr type;
};

using PathsWithColumnAndType = std::vector<PathWithColumnAndType>;

} // namespace doris::vectorized
1 change: 1 addition & 0 deletions be/test/vec/columns/column_object_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <gtest/gtest.h>

#include "vec/columns/common_column_test.h"
#include "vec/json/path_in_data.h"

namespace doris::vectorized {

Expand Down
1 change: 1 addition & 0 deletions gensrc/proto/data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ message PColumnMeta {
optional bool result_is_nullable = 6;
optional string function_name = 7;
optional int32 be_exec_version = 8;
optional segment_v2.ColumnPathInfo column_path = 9;
}

message PBlock {
Expand Down
24 changes: 18 additions & 6 deletions regression-test/data/variant_p0/column_name.out
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,25 @@ UPPER CASE lower case
\N
\N
\N
""
""


1234566
16
8888888
"UPPER CASE"
"dkdkdkdkdkd"
"ooaoaaaaaaa"
"xmxxmmmmmm"
UPPER CASE
dkdkdkdkdkd
ooaoaaaaaaa
xmxxmmmmmm

-- !sql_cnt_1 --
128

-- !sql_cnt_2 --
128

-- !sql_cnt_3 --
128

-- !sql_cnt_4 --
128

15 changes: 13 additions & 2 deletions regression-test/suites/variant_p0/column_name.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ suite("regression_test_variant_column_name", "variant_type"){
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true");
properties("replication_num" = "1", "disable_auto_compaction" = "false");
"""

sql """insert into ${table_name} values (1, '{"中文" : "中文", "\\\u4E2C\\\u6587": "unicode"}')"""
Expand Down Expand Up @@ -61,7 +61,18 @@ suite("regression_test_variant_column_name", "variant_type"){
sql """insert into var_column_name values (7, '{"": 1234566}')"""
sql """insert into var_column_name values (7, '{"": 8888888}')"""

qt_sql "select Tags[''] from var_column_name order by cast(Tags[''] as string)"
qt_sql "select cast(Tags[''] as text) from var_column_name order by cast(Tags[''] as string)"

// name with `.`
sql "truncate table var_column_name"
sql """insert into var_column_name values (7, '{"a.b": "UPPER CASE", "a.c": "lower case", "a" : {"b" : 123}, "a" : {"c" : 456}}')"""
for (int i = 0; i < 7; i++) {
sql """insert into var_column_name select * from var_column_name"""
}
qt_sql_cnt_1 "select count(Tags['a.b']) from var_column_name"
qt_sql_cnt_2 "select count(Tags['a.c']) from var_column_name"
qt_sql_cnt_3 "select count(Tags['a']['b']) from var_column_name"
qt_sql_cnt_4 "select count(Tags['a']['c']) from var_column_name"

try {
sql """insert into var_column_name values (7, '{"": "UPPER CASE", "": "lower case"}')"""
Expand Down
Loading