Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@

namespace doris::segment_v2 {

struct PathWithColumnAndType {
vectorized::PathInData path;
vectorized::ColumnPtr column;
vectorized::DataTypePtr type;
};

using PathsWithColumnAndType = std::vector<PathWithColumnAndType>;

// Reader for hierarchical data for variant, merge with root(sparse encoded columns)
class HierarchicalDataReader : public ColumnIterator {
public:
Expand Down
8 changes: 2 additions & 6 deletions be/src/vec/columns/column_variant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -861,11 +861,7 @@ void ColumnVariant::try_insert(const Field& field) {
}
const auto& object = field.get<const VariantMap&>();
size_t old_size = size();
for (const auto& [key_str, value] : object) {
PathInData key;
if (!key_str.empty()) {
key = PathInData(key_str);
}
for (const auto& [key, value] : object) {
if (!has_subcolumn(key)) {
bool succ = add_sub_column(key, old_size);
if (!succ) {
Expand Down Expand Up @@ -958,7 +954,7 @@ void ColumnVariant::get(size_t n, Field& res) const {
entry->data.get(n, field);
// Notice: we treat null as empty field, since we do not distinguish null and empty for Variant type.
if (field.get_type() != PrimitiveType::TYPE_NULL) {
object.try_emplace(entry->path.get_path(), field);
object.try_emplace(entry->path, field);
}
}
if (object.empty()) {
Expand Down
3 changes: 2 additions & 1 deletion be/src/vec/core/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "util/quantile_state.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
#include "vec/json/path_in_data.h"

namespace doris {
template <PrimitiveType type>
Expand Down Expand Up @@ -82,7 +83,7 @@ struct Map : public FieldVector {
using FieldVector::FieldVector;
};

using VariantMap = std::map<String, Field>;
using VariantMap = std::map<PathInData, Field>;

//TODO: rethink if we really need this? it only save one pointer from std::string
// not POD type so could only use read/write_json_binary instead of read/write_binary
Expand Down
10 changes: 8 additions & 2 deletions be/src/vec/data_types/data_type_variant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ int64_t DataTypeVariant::get_uncompressed_serialized_bytes(const IColumn& column
}
PColumnMeta column_meta_pb;
column_meta_pb.set_name(entry->path.get_path());
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
type->to_pb_column_meta(&column_meta_pb);
std::string meta_binary;
column_meta_pb.SerializeToString(&meta_binary);
Expand Down Expand Up @@ -113,6 +114,7 @@ char* DataTypeVariant::serialize(const IColumn& column, char* buf, int be_exec_v
++num_of_columns;
PColumnMeta column_meta_pb;
column_meta_pb.set_name(entry->path.get_path());
entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not used here*/);
type->to_pb_column_meta(&column_meta_pb);
std::string meta_binary;
column_meta_pb.SerializeToString(&meta_binary);
Expand Down Expand Up @@ -173,11 +175,15 @@ const char* DataTypeVariant::deserialize(const char* buf, MutableColumnPtr* colu
MutableColumnPtr sub_column = type->create_column();
buf = type->deserialize(buf, &sub_column, be_exec_version);

// add subcolumn to column_object
PathInData key;
if (!column_meta_pb.name().empty()) {
if (column_meta_pb.has_column_path()) {
// init from path pb
key.from_protobuf(column_meta_pb.column_path());
} else if (!column_meta_pb.name().empty()) {
// init from name for compatible
key = PathInData {column_meta_pb.name()};
}
// add subcolumn to column_object
column_object->add_sub_column(key, std::move(sub_column), type);
}
size_t num_rows = 0;
Expand Down
3 changes: 2 additions & 1 deletion be/src/vec/data_types/serde/data_type_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ void DataTypeSerDe::convert_variant_map_to_rapidjson(
continue;
}
rapidjson::Value key;
key.SetString(item.first.data(), cast_set<rapidjson::SizeType>(item.first.size()));
key.SetString(item.first.get_path().data(),
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
rapidjson::Value val;
convert_field_to_rapidjson(item.second, val, allocator);
if (val.IsNull() && item.first.empty()) {
Expand Down
8 changes: 8 additions & 0 deletions be/src/vec/json/json_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <utility>
#include <vector>

#include "runtime/primitive_type.h"
#include "util/jsonb_writer.h"
#include "vec/columns/column.h"
#include "vec/common/string_ref.h"
Expand Down Expand Up @@ -124,6 +125,13 @@ enum class ExtractType {
struct ParseConfig {
bool enable_flatten_nested = false;
};
/// Result of parsing of a document.
/// Contains all paths extracted from document
/// and values which are related to them.
struct ParseResult {
std::vector<PathInData> paths;
std::vector<Field> values;
};
template <typename ParserImpl>
class JSONDataParser {
public:
Expand Down
19 changes: 0 additions & 19 deletions be/src/vec/json/path_in_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,7 @@
#include <vector>

#include "gen_cpp/segment_v2.pb.h"
#include "vec/columns/column.h"
#include "vec/common/uint128.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"

namespace doris::vectorized {

Expand Down Expand Up @@ -129,13 +125,6 @@ class PathInDataBuilder {
size_t current_anonymous_array_level = 0;
};
using PathsInData = std::vector<PathInData>;
/// Result of parsing of a document.
/// Contains all paths extracted from document
/// and values which are related to them.
struct ParseResult {
std::vector<PathInData> paths;
std::vector<Field> values;
};

struct PathInDataRef {
const PathInData* ref;
Expand All @@ -148,12 +137,4 @@ struct PathInDataRef {
bool operator==(const PathInDataRef& other) const { return *this->ref == *other.ref; }
};

struct PathWithColumnAndType {
PathInData path;
ColumnPtr column;
DataTypePtr type;
};

using PathsWithColumnAndType = std::vector<PathWithColumnAndType>;

} // namespace doris::vectorized
19 changes: 10 additions & 9 deletions be/test/vec/columns/column_object_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include "vec/columns/column_variant.h"
#include "vec/columns/common_column_test.h"
#include "vec/json/path_in_data.h"

namespace doris::vectorized {

Expand Down Expand Up @@ -196,11 +197,11 @@ TEST_F(ColumnObjectTest, test_insert_indices_from) {

Field result1;
dst_column->get(0, result1);
EXPECT_EQ(result1.get<VariantMap>().at("").get<Int64>(), 123);
EXPECT_EQ(result1.get<VariantMap>().at({}).get<Int64>(), 123);

Field result2;
dst_column->get(1, result2);
EXPECT_EQ(result2.get<VariantMap>().at("").get<Int64>(), 456);
EXPECT_EQ(result2.get<VariantMap>().at({}).get<Int64>(), 456);
}

// Test case 2: Insert from scalar variant source to non-empty destination of same type
Expand Down Expand Up @@ -237,9 +238,9 @@ TEST_F(ColumnObjectTest, test_insert_indices_from) {
dst_column->get(1, result2);
dst_column->get(2, result3);

EXPECT_EQ(result1.get<VariantMap>().at("").get<Int64>(), 789);
EXPECT_EQ(result2.get<VariantMap>().at("").get<Int64>(), 456);
EXPECT_EQ(result3.get<VariantMap>().at("").get<Int64>(), 123);
EXPECT_EQ(result1.get<VariantMap>().at({}).get<Int64>(), 789);
EXPECT_EQ(result2.get<VariantMap>().at({}).get<Int64>(), 456);
EXPECT_EQ(result3.get<VariantMap>().at({}).get<Int64>(), 123);
}

// Test case 3: Insert from non-scalar or different type source (fallback to try_insert)
Expand All @@ -250,13 +251,13 @@ TEST_F(ColumnObjectTest, test_insert_indices_from) {
// Create a map with {"a": 123}
Field field_map = Field::create_field<TYPE_VARIANT>(VariantMap());
auto& map1 = field_map.get<VariantMap&>();
map1["a"] = Field::create_field<TYPE_INT>(123);
map1[PathInData("a")] = Field::create_field<TYPE_INT>(123);
src_column->try_insert(field_map);

// Create another map with {"b": "hello"}
field_map = Field::create_field<TYPE_VARIANT>(VariantMap());
auto& map2 = field_map.get<VariantMap&>();
map2["b"] = Field::create_field<TYPE_STRING>(String("hello"));
map2[PathInData("b")] = Field::create_field<TYPE_STRING>(String("hello"));
src_column->try_insert(field_map);

src_column->finalize();
Expand Down Expand Up @@ -285,8 +286,8 @@ TEST_F(ColumnObjectTest, test_insert_indices_from) {
const auto& result1_map = result1.get<const VariantMap&>();
const auto& result2_map = result2.get<const VariantMap&>();

EXPECT_EQ(result1_map.at("b").get<const String&>(), "hello");
EXPECT_EQ(result2_map.at("a").get<Int64>(), 123);
EXPECT_EQ(result1_map.at(PathInData("b")).get<const String&>(), "hello");
EXPECT_EQ(result2_map.at(PathInData("a")).get<Int64>(), 123);
}
}

Expand Down
1 change: 1 addition & 0 deletions gensrc/proto/data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ message PColumnMeta {
optional bool result_is_nullable = 6;
optional string function_name = 7;
optional int32 be_exec_version = 8;
optional segment_v2.ColumnPathInfo column_path = 9;
}

message PBlock {
Expand Down
24 changes: 18 additions & 6 deletions regression-test/data/variant_p0/column_name.out
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,25 @@ UPPER CASE lower case
\N
\N
\N
""
""


1234566
16
8888888
"UPPER CASE"
"dkdkdkdkdkd"
"ooaoaaaaaaa"
"xmxxmmmmmm"
UPPER CASE
dkdkdkdkdkd
ooaoaaaaaaa
xmxxmmmmmm

-- !sql_cnt_1 --
128

-- !sql_cnt_2 --
128

-- !sql_cnt_3 --
128

-- !sql_cnt_4 --
128

15 changes: 13 additions & 2 deletions regression-test/suites/variant_p0/column_name.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ suite("regression_test_variant_column_name", "variant_type"){
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true");
properties("replication_num" = "1", "disable_auto_compaction" = "false");
"""

sql """insert into ${table_name} values (1, '{"中文" : "中文", "\\\u4E2C\\\u6587": "unicode"}')"""
Expand Down Expand Up @@ -61,7 +61,18 @@ suite("regression_test_variant_column_name", "variant_type"){
sql """insert into var_column_name values (7, '{"": 1234566}')"""
sql """insert into var_column_name values (7, '{"": 8888888}')"""

qt_sql "select Tags[''] from var_column_name order by cast(Tags[''] as string)"
qt_sql "select cast(Tags[''] as text) from var_column_name order by cast(Tags[''] as string)"

// name with `.`
sql "truncate table var_column_name"
sql """insert into var_column_name values (7, '{"a.b": "UPPER CASE", "a.c": "lower case", "a" : {"b" : 123}, "a" : {"c" : 456}}')"""
for (int i = 0; i < 7; i++) {
sql """insert into var_column_name select * from var_column_name"""
}
qt_sql_cnt_1 "select count(Tags['a.b']) from var_column_name"
qt_sql_cnt_2 "select count(Tags['a.c']) from var_column_name"
qt_sql_cnt_3 "select count(Tags['a']['b']) from var_column_name"
qt_sql_cnt_4 "select count(Tags['a']['c']) from var_column_name"

try {
sql """insert into var_column_name values (7, '{"": "UPPER CASE", "": "lower case"}')"""
Expand Down
Loading