Skip to content

Commit

Permalink
[YQL-17637] Revive merging PERCENTILE/MEDIAN aggregation traits for s…
Browse files Browse the repository at this point in the history
…ame column
  • Loading branch information
nepal committed Feb 9, 2024
1 parent 3215892 commit 476ffc3
Show file tree
Hide file tree
Showing 16 changed files with 96 additions and 49 deletions.
5 changes: 4 additions & 1 deletion ydb/library/yql/mount/lib/yql/aggregate.yql
Original file line number Diff line number Diff line change
Expand Up @@ -366,11 +366,14 @@
(let save (lambda '(state) (Apply (Udf 'Stat.TDigest_Serialize) state)))
(let load (lambda '(state) (Apply (Udf 'Stat.TDigest_Deserialize) state)))
(let merge (lambda '(one two) (Apply (Udf 'Stat.TDigest_Merge) one two)))
(let finish (lambda '(state) (MatchType n
(let finish_with_param (lambda '(state n) (MatchType n
'Tuple (lambda '() (StaticMap n (lambda '(n) (Apply get_convert_percentile state n))))
'Struct (lambda '() (StaticMap n (lambda '(n) (Apply get_convert_percentile state n))))
'List (lambda '() (OrderedMap n (lambda '(n) (Apply get_convert_percentile state n))))
(lambda '() (Apply get_convert_percentile state n)))))
(let finish (lambda '(state) (MatchType n
'Tuple (lambda '() (StaticMap n (lambda '(n) (Apply finish_with_param state n))))
(lambda '() (Apply finish_with_param state n)))))
(return (AggregationTraits (ListItemType list_type) init update save load merge finish (Null)))
))))

Expand Down
18 changes: 18 additions & 0 deletions ydb/library/yql/sql/v1/aggregation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,19 @@ class TPercentileFactory final : public TAggregationFactory {
{}

private:
const TString* GetGenericKey() const final {
return Column;
}

void Join(IAggregation* aggr) final {
const auto percentile = dynamic_cast<TPercentileFactory*>(aggr);
YQL_ENSURE(percentile);
YQL_ENSURE(Column && percentile->Column && *Column == *percentile->Column);
YQL_ENSURE(AggMode == percentile->AggMode);
Percentiles.insert(percentile->Percentiles.cbegin(), percentile->Percentiles.cend());
percentile->Percentiles.clear();
}

bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) final {
ui32 adjustArgsCount = isFactory ? 0 : 1;
if (exprs.size() < 0 + adjustArgsCount || exprs.size() > 1 + adjustArgsCount) {
Expand All @@ -722,6 +735,10 @@ class TPercentileFactory final : public TAggregationFactory {
return false;
}

if (!isFactory) {
Column = exprs.front()->GetColumnName();
}

if (!TAggregationFactory::InitAggr(ctx, isFactory, src, node, isFactory ? TVector<TNodePtr>() : TVector<TNodePtr>(1, exprs.front())))
return false;

Expand Down Expand Up @@ -811,6 +828,7 @@ class TPercentileFactory final : public TAggregationFactory {
TSourcePtr FakeSource;
std::multimap<TString, TNodePtr> Percentiles;
TNodePtr FactoryPercentile;
const TString* Column = nullptr;
};

TAggregationPtr BuildPercentileFactoryAggregation(TPosition pos, const TString& name, const TString& factory, EAggregateMode aggMode) {
Expand Down
8 changes: 8 additions & 0 deletions ydb/library/yql/sql/v1/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,14 @@ void IAggregation::DoUpdateState() const {
State.Set(ENodeState::OverWindow, AggMode == EAggregateMode::OverWindow);
}

const TString* IAggregation::GetGenericKey() const {
return nullptr;
}

void IAggregation::Join(IAggregation*) {
YQL_ENSURE(false, "Should not be called");
}

const TString& IAggregation::GetName() const {
return Name;
}
Expand Down
5 changes: 5 additions & 0 deletions ydb/library/yql/sql/v1/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,8 @@ namespace NSQLTranslationV1 {

void DoUpdateState() const override;

virtual const TString* GetGenericKey() const;

virtual bool InitAggr(TContext& ctx, bool isFactory, ISource* src, TAstListNode& node, const TVector<TNodePtr>& exprs) = 0;

virtual std::pair<TNodePtr, bool> AggregationTraits(const TNodePtr& type, bool overState, bool many, bool allowAggApply, TContext& ctx) const;
Expand All @@ -812,6 +814,9 @@ namespace NSQLTranslationV1 {

EAggregateMode GetAggregationMode() const;
void MarkKeyColumnAsGenerated();

virtual void Join(IAggregation* aggr);

private:
virtual TNodePtr GetApply(const TNodePtr& type, bool many, bool allowAggApply, TContext& ctx) const = 0;

Expand Down
13 changes: 13 additions & 0 deletions ydb/library/yql/sql/v1/source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,19 @@ std::pair<TNodePtr, bool> ISource::BuildAggregation(const TString& label, TConte
keysTuple = L(keysTuple, BuildQuotedAtom(Pos, key));
}

std::map<std::pair<bool, TString>, std::vector<IAggregation*>> genericAggrs;
for (const auto& aggr: Aggregations) {
if (const auto key = aggr->GetGenericKey()) {
genericAggrs[{aggr->IsDistinct(), *key}].emplace_back(aggr.Get());
}
}

for (const auto& aggr : genericAggrs) {
for (size_t i = 1U; i < aggr.second.size(); ++i) {
aggr.second.front()->Join(aggr.second[i]);
}
}

const auto listType = Y("TypeOf", label);
auto aggrArgs = Y();
const bool overState = GroupBySuffix == "CombineState" || GroupBySuffix == "MergeState" ||
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,9 @@
],
"test.test[aggregate-percentiles_ungrouped--Debug]": [
{
"checksum": "a7bb996a8773589df25f781ae2e3f6ba",
"size": 4426,
"uri": "https://{canondata_backend}/1600758/32cfdeb8c6377a2e7e62c6c4adbb95f25af7669b/resource.tar.gz#test.test_aggregate-percentiles_ungrouped--Debug_/opt.yql_patched"
"checksum": "786ee62a4079a5b6a83a0338a37c3929",
"size": 4490,
"uri": "https://{canondata_backend}/1917492/fbb65055f5005e96079d6101d279f7d80a51b98c/resource.tar.gz#test.test_aggregate-percentiles_ungrouped--Debug_/opt.yql_patched"
}
],
"test.test[aggregate-percentiles_ungrouped--Plan]": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -533,9 +533,9 @@
],
"test.test[aggregate-percentiles_grouped--Debug]": [
{
"checksum": "f467749887af6dafe25f7681d5b1b1b1",
"size": 5347,
"uri": "https://{canondata_backend}/1600758/aad142702907f13e911494c1a7b312bad34f692a/resource.tar.gz#test.test_aggregate-percentiles_grouped--Debug_/opt.yql_patched"
"checksum": "c0ec3728e41857e889165c1f81802a6b",
"size": 5446,
"uri": "https://{canondata_backend}/1773845/f3e4c472dc37081782e19cd965bd65655fb94de9/resource.tar.gz#test.test_aggregate-percentiles_grouped--Debug_/opt.yql_patched"
}
],
"test.test[aggregate-percentiles_grouped--Plan]": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1700,9 +1700,9 @@
],
"test.test[optimizers-unused_columns_group_one_of_multi--Debug]": [
{
"checksum": "c7ed6969d5c04f9e104d3ba0406d5b6f",
"size": 4156,
"uri": "https://{canondata_backend}/1599023/9fb10775fd57dc9adafaafe2a658f6533a20dc46/resource.tar.gz#test.test_optimizers-unused_columns_group_one_of_multi--Debug_/opt.yql_patched"
"checksum": "677763ab94d3b9f0398c41e73848312a",
"size": 4236,
"uri": "https://{canondata_backend}/1937429/2c15f4d1df999da133c8e0a23d0e5d9af91b079a/resource.tar.gz#test.test_optimizers-unused_columns_group_one_of_multi--Debug_/opt.yql_patched"
}
],
"test.test[optimizers-unused_columns_group_one_of_multi--Plan]": [
Expand Down
6 changes: 3 additions & 3 deletions ydb/library/yql/tests/sql/dq_file/part2/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,9 @@
],
"test.test[aggregate-percentiles_containers--Debug]": [
{
"checksum": "adba2e9a60ae2f1903b0b4f7d334cfe7",
"size": 8655,
"uri": "https://{canondata_backend}/1784117/562608e5eb2c9a9b9076bc8caa84f8c27bb8d804/resource.tar.gz#test.test_aggregate-percentiles_containers--Debug_/opt.yql_patched"
"checksum": "ff4ccf5769f32d89a03d38e1338a3da4",
"size": 7429,
"uri": "https://{canondata_backend}/1814674/791ad1ff4b533bb1dc92a434e57a1cfb0e970e3d/resource.tar.gz#test.test_aggregate-percentiles_containers--Debug_/opt.yql_patched"
}
],
"test.test[aggregate-percentiles_containers--Plan]": [
Expand Down
6 changes: 3 additions & 3 deletions ydb/library/yql/tests/sql/dq_file/part5/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -456,9 +456,9 @@
],
"test.test[aggregate-percentile_interval-default.txt-Debug]": [
{
"checksum": "f27012231945367819b310461cb18de9",
"size": 8652,
"uri": "https://{canondata_backend}/1942100/090fa9e99dfe7f43e6470439372ea4a84a495992/resource.tar.gz#test.test_aggregate-percentile_interval-default.txt-Debug_/opt.yql_patched"
"checksum": "6801899fea208273de4cd7f5e047def5",
"size": 8607,
"uri": "https://{canondata_backend}/1942525/a8c7a526c65018b574c61b860f700c144215b503/resource.tar.gz#test.test_aggregate-percentile_interval-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[aggregate-percentile_interval-default.txt-Plan]": [
Expand Down
36 changes: 18 additions & 18 deletions ydb/library/yql/tests/sql/sql2yql/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -2535,23 +2535,23 @@
],
"test_sql2yql.test[aggregate-percentile_interval]": [
{
"checksum": "5b4080f783d6030a481ed74ab73cb1db",
"size": 7783,
"uri": "https://{canondata_backend}/1925821/ec9ae1e25388a76d5f7a0df27259196bc4217c7e/resource.tar.gz#test_sql2yql.test_aggregate-percentile_interval_/sql.yql"
"checksum": "67c37e0b3c04639249682221e4f7d88e",
"size": 7664,
"uri": "https://{canondata_backend}/1784826/6e24b46ab3ce91844f7fd0a7e573c44927321db7/resource.tar.gz#test_sql2yql.test_aggregate-percentile_interval_/sql.yql"
}
],
"test_sql2yql.test[aggregate-percentiles_containers]": [
{
"checksum": "6769c75a2308dcc142c7ff777b855721",
"size": 6031,
"uri": "https://{canondata_backend}/1784117/5fac73a22f194fa5439186fe33ef2e6bf62271e1/resource.tar.gz#test_sql2yql.test_aggregate-percentiles_containers_/sql.yql"
"checksum": "77473f9f1a6278e8e39c8e276268ec2e",
"size": 5359,
"uri": "https://{canondata_backend}/1784826/6e24b46ab3ce91844f7fd0a7e573c44927321db7/resource.tar.gz#test_sql2yql.test_aggregate-percentiles_containers_/sql.yql"
}
],
"test_sql2yql.test[aggregate-percentiles_grouped]": [
{
"checksum": "ebde86f573b7ae4e2377b469d7808202",
"size": 2676,
"uri": "https://{canondata_backend}/1925821/ec9ae1e25388a76d5f7a0df27259196bc4217c7e/resource.tar.gz#test_sql2yql.test_aggregate-percentiles_grouped_/sql.yql"
"checksum": "84f8e6ac21c327f0e2d149c3c2d34063",
"size": 2568,
"uri": "https://{canondata_backend}/1784826/6e24b46ab3ce91844f7fd0a7e573c44927321db7/resource.tar.gz#test_sql2yql.test_aggregate-percentiles_grouped_/sql.yql"
}
],
"test_sql2yql.test[aggregate-percentiles_grouped_expr]": [
Expand All @@ -2563,9 +2563,9 @@
],
"test_sql2yql.test[aggregate-percentiles_ungrouped]": [
{
"checksum": "9307068640e3157abd738b9552104a74",
"size": 2101,
"uri": "https://{canondata_backend}/1925821/ec9ae1e25388a76d5f7a0df27259196bc4217c7e/resource.tar.gz#test_sql2yql.test_aggregate-percentiles_ungrouped_/sql.yql"
"checksum": "3732a0e1134e089922c262aede0a3a09",
"size": 1993,
"uri": "https://{canondata_backend}/1784826/6e24b46ab3ce91844f7fd0a7e573c44927321db7/resource.tar.gz#test_sql2yql.test_aggregate-percentiles_ungrouped_/sql.yql"
}
],
"test_sql2yql.test[aggregate-rollup_with_dict]": [
Expand Down Expand Up @@ -9976,16 +9976,16 @@
],
"test_sql2yql.test[optimizers-unused_columns_group]": [
{
"checksum": "047fa7a1ddc9b481e8f75b3009bc7747",
"size": 4020,
"uri": "https://{canondata_backend}/1925821/ec9ae1e25388a76d5f7a0df27259196bc4217c7e/resource.tar.gz#test_sql2yql.test_optimizers-unused_columns_group_/sql.yql"
"checksum": "23f2f151abde1b6bc069afcdaa98fb99",
"size": 3914,
"uri": "https://{canondata_backend}/1784826/6e24b46ab3ce91844f7fd0a7e573c44927321db7/resource.tar.gz#test_sql2yql.test_optimizers-unused_columns_group_/sql.yql"
}
],
"test_sql2yql.test[optimizers-unused_columns_group_one_of_multi]": [
{
"checksum": "7af97611d638053e022b699c8a343f88",
"size": 3961,
"uri": "https://{canondata_backend}/1925821/ec9ae1e25388a76d5f7a0df27259196bc4217c7e/resource.tar.gz#test_sql2yql.test_optimizers-unused_columns_group_one_of_multi_/sql.yql"
"checksum": "eefd0a9586e332132c7ca389e60c99d1",
"size": 3855,
"uri": "https://{canondata_backend}/1784826/6e24b46ab3ce91844f7fd0a7e573c44927321db7/resource.tar.gz#test_sql2yql.test_optimizers-unused_columns_group_one_of_multi_/sql.yql"
}
],
"test_sql2yql.test[optimizers-unused_columns_window]": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,9 @@
],
"test.test[aggregate-percentiles_ungrouped--Debug]": [
{
"checksum": "637965a8ee18ca0cbae0f3aa5c40671a",
"size": 4556,
"uri": "https://{canondata_backend}/1924537/6033fa62c3628f7f8f85fe0b254eb896d2d9c2ce/resource.tar.gz#test.test_aggregate-percentiles_ungrouped--Debug_/opt.yql"
"checksum": "f58b940759e978bd421dd88ea5a93a95",
"size": 4638,
"uri": "https://{canondata_backend}/1925842/e26b767307e059d5871ba5182e419bc098be6e6b/resource.tar.gz#test.test_aggregate-percentiles_ungrouped--Debug_/opt.yql"
}
],
"test.test[aggregate-percentiles_ungrouped--Plan]": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -487,9 +487,9 @@
],
"test.test[aggregate-percentiles_grouped--Debug]": [
{
"checksum": "7e9b2491ce46422377d9bc3944363901",
"size": 7660,
"uri": "https://{canondata_backend}/1923547/61458c9c64b8429a1ff4c80acb29f295ac160173/resource.tar.gz#test.test_aggregate-percentiles_grouped--Debug_/opt.yql"
"checksum": "adf837da9aba1da094a04ebb64bdf170",
"size": 7723,
"uri": "https://{canondata_backend}/1942415/5fc845db8c7d9a5e3223d35f17ada1aad250be16/resource.tar.gz#test.test_aggregate-percentiles_grouped--Debug_/opt.yql"
}
],
"test.test[aggregate-percentiles_grouped--Plan]": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1471,9 +1471,9 @@
],
"test.test[optimizers-unused_columns_group_one_of_multi--Debug]": [
{
"checksum": "aeaf1bb6bdf70af4653faa667e4a136c",
"size": 4635,
"uri": "https://{canondata_backend}/1937027/16b7289b1b8f5fdff728155d836fa2b238949b2d/resource.tar.gz#test.test_optimizers-unused_columns_group_one_of_multi--Debug_/opt.yql"
"checksum": "3a01806a12143010452fa4804ea9668d",
"size": 4715,
"uri": "https://{canondata_backend}/1936947/121cb7bfa79682ba0826d97605a7c8274a777448/resource.tar.gz#test.test_optimizers-unused_columns_group_one_of_multi--Debug_/opt.yql"
}
],
"test.test[optimizers-unused_columns_group_one_of_multi--Plan]": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,9 @@
],
"test.test[aggregate-percentiles_containers--Debug]": [
{
"checksum": "54dd186de9e523887f8164e6759219b7",
"size": 9490,
"uri": "https://{canondata_backend}/1871182/6486721d05dde2b6349875a5606e975edca5ff38/resource.tar.gz#test.test_aggregate-percentiles_containers--Debug_/opt.yql"
"checksum": "2d6826ce370619b59fb90a23104e7b4c",
"size": 9272,
"uri": "https://{canondata_backend}/1814674/e05cebb4f2aa41ec0dfd7bbae9cb39981a421150/resource.tar.gz#test.test_aggregate-percentiles_containers--Debug_/opt.yql"
}
],
"test.test[aggregate-percentiles_containers--Plan]": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,9 @@
],
"test.test[aggregate-percentile_interval-default.txt-Debug]": [
{
"checksum": "2e7ded8794e770bf85fcbd1d32aa4159",
"size": 8809,
"uri": "https://{canondata_backend}/1600758/4dc73fd352eb8e7cdc1c8e9c1d7bbf928a55551d/resource.tar.gz#test.test_aggregate-percentile_interval-default.txt-Debug_/opt.yql"
"checksum": "cb7d7f27882936285867ca78f3c722cc",
"size": 8824,
"uri": "https://{canondata_backend}/1942525/85e1a294d89f4a2dd75bf364b4da5c1dfb0d7745/resource.tar.gz#test.test_aggregate-percentile_interval-default.txt-Debug_/opt.yql"
}
],
"test.test[aggregate-percentile_interval-default.txt-Plan]": [
Expand Down

0 comments on commit 476ffc3

Please sign in to comment.