Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ struct SubcolumnReader {
};
using SubcolumnColumnReaders = vectorized::SubcolumnsTree<SubcolumnReader>;

struct PathWithColumnAndType {
vectorized::PathInData path;
vectorized::ColumnPtr column;
vectorized::DataTypePtr type;
};

using PathsWithColumnAndType = std::vector<PathWithColumnAndType>;

// Reader for hierarchical data for variant, merge with root(sparse encoded columns)
class HierarchicalDataReader : public ColumnIterator {
public:
Expand Down
8 changes: 2 additions & 6 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -805,11 +805,7 @@ void ColumnObject::try_insert(const Field& field) {
}
const auto& object = field.get<const VariantMap&>();
size_t old_size = size();
for (const auto& [key_str, value] : object) {
PathInData key;
if (!key_str.empty()) {
key = PathInData(key_str);
}
for (const auto& [key, value] : object) {
if (!has_subcolumn(key)) {
bool succ = add_sub_column(key, old_size);
if (!succ) {
Expand Down Expand Up @@ -894,7 +890,7 @@ void ColumnObject::get(size_t n, Field& res) const {
auto& object = res.get<VariantMap&>();

for (const auto& entry : subcolumns) {
auto it = object.try_emplace(entry->path.get_path()).first;
auto it = object.try_emplace(entry->path).first;
entry->data.get(n, it->second);
}
}
Expand Down
9 changes: 2 additions & 7 deletions be/src/vec/core/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "util/quantile_state.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
#include "vec/json/path_in_data.h"

namespace doris {
namespace vectorized {
Expand Down Expand Up @@ -153,13 +154,7 @@ DEFINE_FIELD_VECTOR(Tuple);
DEFINE_FIELD_VECTOR(Map);
#undef DEFINE_FIELD_VECTOR

using FieldMap = std::map<String, Field, std::less<String>>;
#define DEFINE_FIELD_MAP(X) \
struct X : public FieldMap { \
using FieldMap::FieldMap; \
}
DEFINE_FIELD_MAP(VariantMap);
#undef DEFINE_FIELD_MAP
using VariantMap = std::map<PathInData, Field>;

class JsonbField {
public:
Expand Down
10 changes: 8 additions & 2 deletions be/src/vec/data_types/data_type_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ int64_t DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
}
PColumnMeta column_meta_pb;
column_meta_pb.set_name(entry->path.get_path());
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
type->to_pb_column_meta(&column_meta_pb);
std::string meta_binary;
column_meta_pb.SerializeToString(&meta_binary);
Expand Down Expand Up @@ -112,6 +113,7 @@ char* DataTypeObject::serialize(const IColumn& column, char* buf, int be_exec_ve
++num_of_columns;
PColumnMeta column_meta_pb;
column_meta_pb.set_name(entry->path.get_path());
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
type->to_pb_column_meta(&column_meta_pb);
std::string meta_binary;
column_meta_pb.SerializeToString(&meta_binary);
Expand Down Expand Up @@ -157,11 +159,15 @@ const char* DataTypeObject::deserialize(const char* buf, IColumn* column,
MutableColumnPtr sub_column = type->create_column();
buf = type->deserialize(buf, sub_column.get(), be_exec_version);

// add subcolumn to column_object
PathInData key;
if (!column_meta_pb.name().empty()) {
if (column_meta_pb.has_column_path()) {
// init from path pb
key.from_protobuf(column_meta_pb.column_path());
} else if (!column_meta_pb.name().empty()) {
// init from name for compatible
key = PathInData {column_meta_pb.name()};
}
// add subcolumn to column_object
column_object->add_sub_column(key, std::move(sub_column), type);
}
size_t num_rows = 0;
Expand Down
8 changes: 8 additions & 0 deletions be/src/vec/json/json_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <utility>
#include <vector>

#include "runtime/primitive_type.h"
#include "util/jsonb_writer.h"
#include "vec/columns/column.h"
#include "vec/common/string_ref.h"
Expand Down Expand Up @@ -120,6 +121,13 @@ enum class ExtractType {
ToString = 0,
// ...
};
/// Result of parsing of a document.
/// Contains all paths extracted from document
/// and values which are related to them.
struct ParseResult {
std::vector<PathInData> paths;
std::vector<Field> values;
};
template <typename ParserImpl, bool parse_nested = false>
class JSONDataParser {
public:
Expand Down
8 changes: 0 additions & 8 deletions be/src/vec/json/path_in_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@

#include "gen_cpp/segment_v2.pb.h"
#include "vec/common/uint128.h"
#include "vec/core/field.h"
#include "vec/core/types.h"

namespace doris::vectorized {
Expand Down Expand Up @@ -123,13 +122,6 @@ class PathInDataBuilder {
size_t current_anonymous_array_level = 0;
};
using PathsInData = std::vector<PathInData>;
/// Result of parsing of a document.
/// Contains all paths extracted from document
/// and values which are related to them.
struct ParseResult {
std::vector<PathInData> paths;
std::vector<Field> values;
};

struct PathInDataRef {
const PathInData* ref;
Expand Down
1 change: 1 addition & 0 deletions be/test/vec/columns/column_object_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <gtest/gtest.h>

#include "vec/columns/common_column_test.h"
#include "vec/json/path_in_data.h"

namespace doris::vectorized {

Expand Down
2 changes: 2 additions & 0 deletions gensrc/proto/data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ message PColumnMeta {
repeated PColumnMeta children = 5;
optional bool result_is_nullable = 6;
optional string function_name = 7;
optional int32 be_exec_version = 8;
optional segment_v2.ColumnPathInfo column_path = 9;
}

message PBlock {
Expand Down
24 changes: 18 additions & 6 deletions regression-test/data/variant_p0/column_name.out
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,25 @@ UPPER CASE lower case
\N
\N
\N
""
""


1234566
16
8888888
"UPPER CASE"
"dkdkdkdkdkd"
"ooaoaaaaaaa"
"xmxxmmmmmm"
UPPER CASE
dkdkdkdkdkd
ooaoaaaaaaa
xmxxmmmmmm

-- !sql_cnt_1 --
128

-- !sql_cnt_2 --
128

-- !sql_cnt_3 --
128

-- !sql_cnt_4 --
128

15 changes: 13 additions & 2 deletions regression-test/suites/variant_p0/column_name.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ suite("regression_test_variant_column_name", "variant_type"){
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true");
properties("replication_num" = "1", "disable_auto_compaction" = "false");
"""

// sql "set experimental_enable_nereids_planner = false"
Expand Down Expand Up @@ -63,7 +63,18 @@ suite("regression_test_variant_column_name", "variant_type"){
sql """insert into var_column_name values (7, '{"": 1234566}')"""
sql """insert into var_column_name values (7, '{"": 8888888}')"""

qt_sql "select Tags[''] from var_column_name order by cast(Tags[''] as string)"
qt_sql "select cast(Tags[''] as text) from var_column_name order by cast(Tags[''] as string)"

// name with `.`
sql "truncate table var_column_name"
sql """insert into var_column_name values (7, '{"a.b": "UPPER CASE", "a.c": "lower case", "a" : {"b" : 123}, "a" : {"c" : 456}}')"""
for (int i = 0; i < 7; i++) {
sql """insert into var_column_name select * from var_column_name"""
}
qt_sql_cnt_1 "select count(Tags['a.b']) from var_column_name"
qt_sql_cnt_2 "select count(Tags['a.c']) from var_column_name"
qt_sql_cnt_3 "select count(Tags['a']['b']) from var_column_name"
qt_sql_cnt_4 "select count(Tags['a']['c']) from var_column_name"

try {
sql """insert into var_column_name values (7, '{"": "UPPER CASE", "": "lower case"}')"""
Expand Down
Loading