Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expression: refactor grouping function implementation #7583

Merged
merged 8 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 56 additions & 15 deletions dbms/src/Functions/FunctionsGrouping.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
return (num & (num - 1)) == 0;
}

using ResultType = UInt8;
using ResultType = UInt64;

class FunctionGrouping : public IFunctionBase
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
, public IExecutableFunction
Expand All @@ -62,22 +62,34 @@ class FunctionGrouping : public IFunctionBase
throw Exception("Grouping function decodes meta data fail");

mode = static_cast<tipb::GroupingMode>(meta.mode());
size_t num = meta.grouping_marks_size();
size_t num_grouping_mark = meta.grouping_marks_size();

if (num <= 0)
if (num_grouping_mark <= 0)
throw Exception("number of grouping_ids should be greater than 0");

if (mode == tipb::GroupingMode::ModeBitAnd || mode == tipb::GroupingMode::ModeNumericCmp)
{
assert(meta.grouping_marks_size() == 1);
if (mode == tipb::GroupingMode::ModeBitAnd)
assert(isPowerOf2(meta.grouping_marks()[0]));
meta_grouping_id = meta.grouping_marks()[0];
for (const auto & one_grouping_mark : meta.grouping_marks())
{
assert(one_grouping_mark.grouping_nums_size() == 1);
if (mode == tipb::GroupingMode::ModeBitAnd)
assert(isPowerOf2(one_grouping_mark.grouping_nums(0)));
// should store the meta_grouping_id
meta_grouping_ids.emplace_back(one_grouping_mark.grouping_nums(0));
}
}
else
{
for (size_t i = 0; i < num; ++i)
meta_grouping_marks.insert(meta.grouping_marks()[i]);
for (const auto & one_grouping_mark : meta.grouping_marks())
{
// for every dimension, construct a set.
std::set<UInt64> grouping_ids;
for (auto id : one_grouping_mark.grouping_nums())
{
grouping_ids.insert(id);
}
meta_grouping_marks.emplace_back(grouping_ids);
}
}
}

Expand Down Expand Up @@ -146,31 +158,58 @@ class FunctionGrouping : public IFunctionBase

ResultType groupingImplModeAndBit(UInt64 grouping_id) const
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
{
return (grouping_id & meta_grouping_id) != 0;
UInt64 final = 0;
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
for (auto one_grouping_id : meta_grouping_ids)
{
final <<= 1;
if ((grouping_id & one_grouping_id) > 0)
// col is not needed, meaning being filled null and grouped. = 1
final += 1;
}
return final;
}

ResultType groupingImplModeNumericCmp(UInt64 grouping_id) const
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
{
return grouping_id > meta_grouping_id;
UInt64 final = 0;
for (auto one_grouping_id : meta_grouping_ids)
{
final <<= 1;
if (grouping_id > one_grouping_id)
// col is not needed, meaning being filled null and grouped. = 1
final += 1;
}
return final;
}

ResultType groupingImplModeNumericSet(UInt64 grouping_id) const
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
{
auto iter = meta_grouping_marks.find(grouping_id);
return iter == meta_grouping_marks.end();
UInt64 final = 0;
for (auto one_grouping_mark : meta_grouping_marks)
{
final <<= 1;
auto iter = one_grouping_mark.find(grouping_id);
if (iter != one_grouping_mark.end())
// In num-set mode, grouping marks stores those not-needed-col's grouping set (GIDs).
// When we can find the grouping id in set, it means this col is not needed and has been filled with null and grouped. = 1
final += 1;
}
return final;
}

private:
DataTypes argument_types;
DataTypePtr return_type;

tipb::GroupingMode mode;
UInt64 meta_grouping_id = 0;
// one more dimension for multi grouping function args like: grouping(x,y,z...)
std::vector<UInt64> meta_grouping_ids;

// In grouping function, the number of rolled up columns usually very small,
// so it's appropriate to use std::set as it is faster than unordered_set in
// small amount of elements.
std::set<UInt64> meta_grouping_marks = {};
// one more dimension for multi grouping function args like: grouping(x,y,z...)
std::vector<std::set<UInt64>> meta_grouping_marks = {};
};

class FunctionBuilderGrouping : public IFunctionBuilder
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -191,6 +230,8 @@ class FunctionBuilderGrouping : public IFunctionBuilder

String getName() const override { return name; }
bool useDefaultImplementationForNulls() const override { return true; }
// at frontend, grouping function can receive maximum number of parameters as 64.
// at backend, grouping function has been rewritten as receive only gid with meta.
size_t getNumberOfArguments() const override { return 1; }
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
void setExpr(const tipb::Expr & expr_)
{
Expand Down
75 changes: 42 additions & 33 deletions dbms/src/Functions/tests/gtest_grouping.cpp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to add more cases when we have multi parameters

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ namespace tests
struct MetaData
{
tipb::GroupingMode mode{};
UInt64 grouping_id{};
std::set<UInt64> grouping_ids;
std::vector<UInt64> grouping_id;
std::vector<std::set<UInt64>> grouping_ids;
};

FuncMetaData buildFuncMetaData(const MetaData & meta_data)
Expand All @@ -37,12 +37,22 @@ FuncMetaData buildFuncMetaData(const MetaData & meta_data)
grouping_meta.set_mode(meta_data.mode);
if (meta_data.mode == tipb::GroupingMode::ModeBitAnd || meta_data.mode == tipb::GroupingMode::ModeNumericCmp)
{
grouping_meta.add_grouping_marks(meta_data.grouping_id);
for (auto one_id : meta_data.grouping_id)
{
tipb::GroupingMark * grouping_mark = grouping_meta.add_grouping_marks();
grouping_mark->add_grouping_nums(one_id);
}
}
else
{
for (auto grouping_id : meta_data.grouping_ids)
grouping_meta.add_grouping_marks(grouping_id);
for (const auto & one_grouping_mark : meta_data.grouping_ids)
{
tipb::GroupingMark * grouping_mark = grouping_meta.add_grouping_marks();
for (auto one_num : one_grouping_mark)
{
grouping_mark->add_grouping_nums(one_num);
}
}
}

func_meta.val = grouping_meta.SerializeAsString();
Expand All @@ -59,7 +69,6 @@ TEST_F(TestGrouping, ModeBitAnd)
try
{
MetaData meta_data;
FuncMetaData func_meta;
meta_data.mode = tipb::GroupingMode::ModeBitAnd;

// const
Expand All @@ -71,10 +80,10 @@ try
size_t case_num = grouping_id.size();
for (size_t i = 0; i < case_num; ++i)
{
meta_data.grouping_id = meta_grouping_id[i];
meta_data.grouping_id = std::vector<UInt64>{meta_grouping_id[i]};
FuncMetaData func_meta = buildFuncMetaData(meta_data);
ASSERT_COLUMN_EQ(
createConstColumn<UInt8>(1, expect[i]),
createConstColumn<UInt64>(1, expect[i]),
executeFunctionWithMetaData(
func_name,
std::vector<ColumnWithTypeAndName>{createConstColumn<UInt64>(1, grouping_id[i])},
Expand All @@ -95,10 +104,10 @@ try

for (size_t i = 0; i < expects.size(); ++i)
{
meta_data.grouping_id = meta_grouping_id[i];
meta_data.grouping_id = std::vector<UInt64>{meta_grouping_id[i]};
FuncMetaData func_meta = buildFuncMetaData(meta_data);
ASSERT_COLUMN_EQ(
createColumn<UInt8>(expects[i]),
createColumn<UInt64>(expects[i]),
executeFunctionWithMetaData(
func_name,
std::vector<ColumnWithTypeAndName>{createColumn<UInt64>(grouping_id)},
Expand All @@ -119,10 +128,10 @@ try

for (size_t i = 0; i < expects.size(); ++i)
{
meta_data.grouping_id = meta_grouping_id[i];
meta_data.grouping_id = std::vector<UInt64>{meta_grouping_id[i]};
FuncMetaData func_meta = buildFuncMetaData(meta_data);
ASSERT_COLUMN_EQ(
createColumn<Nullable<UInt8>>(expects[i]),
createColumn<Nullable<UInt64>>(expects[i]),
executeFunctionWithMetaData(
func_name,
std::vector<ColumnWithTypeAndName>{createColumn<Nullable<UInt64>>(grouping_id)},
Expand All @@ -148,10 +157,10 @@ try
size_t case_num = grouping_id.size();
for (size_t i = 0; i < case_num; ++i)
{
meta_data.grouping_id = meta_grouping_id[i];
meta_data.grouping_id = std::vector<UInt64>{meta_grouping_id[i]};
FuncMetaData func_meta = buildFuncMetaData(meta_data);
ASSERT_COLUMN_EQ(
createConstColumn<UInt8>(1, expect[i]),
createConstColumn<UInt64>(1, expect[i]),
executeFunctionWithMetaData(
func_name,
std::vector<ColumnWithTypeAndName>{createConstColumn<UInt64>(1, grouping_id[i])},
Expand All @@ -175,10 +184,10 @@ try

for (size_t i = 0; i < expects.size(); ++i)
{
meta_data.grouping_id = meta_grouping_id[i];
meta_data.grouping_id = std::vector<UInt64>{meta_grouping_id[i]};
FuncMetaData func_meta = buildFuncMetaData(meta_data);
ASSERT_COLUMN_EQ(
createColumn<UInt8>(expects[i]),
createColumn<UInt64>(expects[i]),
executeFunctionWithMetaData(
func_name,
std::vector<ColumnWithTypeAndName>{createColumn<UInt64>(grouping_id)},
Expand All @@ -202,10 +211,10 @@ try

for (size_t i = 0; i < expects.size(); ++i)
{
meta_data.grouping_id = meta_grouping_id[i];
meta_data.grouping_id = std::vector<UInt64>{meta_grouping_id[i]};
FuncMetaData func_meta = buildFuncMetaData(meta_data);
ASSERT_COLUMN_EQ(
createColumn<Nullable<UInt8>>(expects[i]),
createColumn<Nullable<UInt64>>(expects[i]),
executeFunctionWithMetaData(
func_name,
std::vector<ColumnWithTypeAndName>{createColumn<Nullable<UInt64>>(grouping_id)},
Expand All @@ -226,15 +235,15 @@ try
{
std::vector<UInt64> grouping_id{2, 2, 2, 2};
std::vector<std::set<UInt64>> meta_grouping_ids{{0, 2}, {2}, {3}, {1, 3}};
std::vector<UInt64> expect{0, 0, 1, 1};
std::vector<UInt64> expect{1, 1, 0, 0};

size_t case_num = grouping_id.size();
for (size_t i = 0; i < case_num; ++i)
{
meta_data.grouping_ids = meta_grouping_ids[i];
meta_data.grouping_ids = std::vector<std::set<UInt64>>{meta_grouping_ids[i]};
FuncMetaData func_meta = buildFuncMetaData(meta_data);
ASSERT_COLUMN_EQ(
createConstColumn<UInt8>(1, expect[i]),
createConstColumn<UInt64>(1, expect[i]),
executeFunctionWithMetaData(
func_name,
std::vector<ColumnWithTypeAndName>{createConstColumn<UInt64>(1, grouping_id[i])},
Expand All @@ -248,17 +257,17 @@ try
std::vector<UInt64> grouping_id{1, 2, 3, 4};
std::vector<std::set<UInt64>> meta_grouping_id{{2}, {3}, {2, 3}, {1, 3}};
std::vector<std::vector<UInt64>> expects{
{1, 0, 1, 1},
{1, 1, 0, 1},
{1, 0, 0, 1},
{0, 1, 0, 1}};
{0, 1, 0, 0},
{0, 0, 1, 0},
{0, 1, 1, 0},
{1, 0, 1, 0}};

for (size_t i = 0; i < expects.size(); ++i)
{
meta_data.grouping_ids = meta_grouping_id[i];
meta_data.grouping_ids = std::vector<std::set<UInt64>>{meta_grouping_id[i]};
FuncMetaData func_meta = buildFuncMetaData(meta_data);
ASSERT_COLUMN_EQ(
createColumn<UInt8>(expects[i]),
createColumn<UInt64>(expects[i]),
executeFunctionWithMetaData(
func_name,
std::vector<ColumnWithTypeAndName>{createColumn<UInt64>(grouping_id)},
Expand All @@ -272,17 +281,17 @@ try
std::vector<std::optional<UInt64>> grouping_id{1, 2, 3, 4, {}};
std::vector<std::set<UInt64>> meta_grouping_id{{2}, {3}, {2, 3}, {1, 3}};
std::vector<std::vector<std::optional<UInt64>>> expects{
{1, 0, 1, 1, {}},
{1, 1, 0, 1, {}},
{1, 0, 0, 1, {}},
{0, 1, 0, 1, {}}};
{0, 1, 0, 0, {}},
{0, 0, 1, 0, {}},
{0, 1, 1, 0, {}},
{1, 0, 1, 0, {}}};

for (size_t i = 0; i < expects.size(); ++i)
{
meta_data.grouping_ids = meta_grouping_id[i];
meta_data.grouping_ids = std::vector<std::set<UInt64>>{meta_grouping_id[i]};
FuncMetaData func_meta = buildFuncMetaData(meta_data);
ASSERT_COLUMN_EQ(
createColumn<Nullable<UInt8>>(expects[i]),
createColumn<Nullable<UInt64>>(expects[i]),
executeFunctionWithMetaData(
func_name,
std::vector<ColumnWithTypeAndName>{createColumn<Nullable<UInt64>>(grouping_id)},
Expand Down