Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expression: refactor grouping function implementation #7583

Merged
merged 8 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion contrib/tipb
71 changes: 56 additions & 15 deletions dbms/src/Functions/FunctionsGrouping.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
return (num & (num - 1)) == 0;
}

using ResultType = UInt8;
using ResultType = UInt64;

class FunctionGrouping : public IFunctionBase
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
, public IExecutableFunction
Expand All @@ -62,22 +62,34 @@ class FunctionGrouping : public IFunctionBase
throw Exception("Grouping function decodes meta data fail");

mode = static_cast<tipb::GroupingMode>(meta.mode());
size_t num = meta.grouping_marks_size();
size_t num_grouping_mark = meta.grouping_marks_size();

if (num <= 0)
if (num_grouping_mark <= 0)
throw Exception("number of grouping_ids should be greater than 0");

if (mode == tipb::GroupingMode::ModeBitAnd || mode == tipb::GroupingMode::ModeNumericCmp)
{
assert(meta.grouping_marks_size() == 1);
if (mode == tipb::GroupingMode::ModeBitAnd)
assert(isPowerOf2(meta.grouping_marks()[0]));
meta_grouping_id = meta.grouping_marks()[0];
for (const auto & one_grouping_mark : meta.grouping_marks())
{
assert(one_grouping_mark.grouping_nums_size() == 1);
if (mode == tipb::GroupingMode::ModeBitAnd)
assert(isPowerOf2(one_grouping_mark.grouping_nums()[0]));
// should store the meta_grouping_id.
meta_grouping_ids.emplace_back(one_grouping_mark.grouping_nums()[0]);
}
}
else
{
for (size_t i = 0; i < num; ++i)
meta_grouping_marks.insert(meta.grouping_marks()[i]);
for (const auto & one_grouping_mark : meta.grouping_marks())
{
// for every dimension, construct a set.
std::set<UInt64> grouping_ids;
for (auto id : one_grouping_mark.grouping_nums())
{
grouping_ids.insert(id);
}
meta_grouping_marks.emplace_back(grouping_ids);
}
}
}

Expand Down Expand Up @@ -146,31 +158,58 @@ class FunctionGrouping : public IFunctionBase

ResultType groupingImplModeAndBit(UInt64 grouping_id) const
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
{
return (grouping_id & meta_grouping_id) != 0;
UInt64 res = 0;
for (auto one_grouping_id : meta_grouping_ids)
{
res <<= 1;
if ((grouping_id & one_grouping_id) <= 0)
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
// col is not need, meaning be filled with null and grouped. = 1
res += 1;
}
return res;
}

ResultType groupingImplModeNumericCmp(UInt64 grouping_id) const
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
{
return grouping_id > meta_grouping_id;
UInt64 res = 0;
for (auto one_grouping_id : meta_grouping_ids)
{
res <<= 1;
if (grouping_id <= one_grouping_id)
// col is not needed, meaning being filled null and grouped. = 1
res += 1;
}
return res;
}

ResultType groupingImplModeNumericSet(UInt64 grouping_id) const
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
{
auto iter = meta_grouping_marks.find(grouping_id);
return iter == meta_grouping_marks.end();
UInt64 res = 0;
for (auto one_grouping_mark : meta_grouping_marks)
{
res <<= 1;
auto iter = one_grouping_mark.find(grouping_id);
if (iter == one_grouping_mark.end())
// In num-set mode, grouping marks stores those needed-col's grouping set (GIDs).
// When we can't find the grouping id in set, it means this col is not needed, being filled with null and grouped. = 1
res += 1;
}
return res;
}

private:
DataTypes argument_types;
DataTypePtr return_type;

tipb::GroupingMode mode;
UInt64 meta_grouping_id = 0;
// one more dimension for multi grouping function args like: grouping(x,y,z...)
std::vector<UInt64> meta_grouping_ids;

// In grouping function, the number of rolled up columns usually very small,
// so it's appropriate to use std::set as it is faster than unordered_set in
// small amount of elements.
std::set<UInt64> meta_grouping_marks = {};
// one more dimension for multi grouping function args like: grouping(x,y,z...)
std::vector<std::set<UInt64>> meta_grouping_marks = {};
};

class FunctionBuilderGrouping : public IFunctionBuilder
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -191,6 +230,8 @@ class FunctionBuilderGrouping : public IFunctionBuilder

String getName() const override { return name; }
bool useDefaultImplementationForNulls() const override { return true; }
// at frontend, grouping function can receive maximum number of parameters as 64.
// at backend, grouping function has been rewritten as receive only gid with meta.
size_t getNumberOfArguments() const override { return 1; }
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
void setExpr(const tipb::Expr & expr_)
{
Expand Down
Loading