Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,8 @@

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -216,23 +214,27 @@ private static boolean needTransform(LogicalAggregate<Plan> agg, List<Alias> ali
if (agg.getSourceRepeat().isPresent()) {
return false;
}
Set<Expression> distinctFunc = new HashSet<>();
if (agg.distinctFuncNum() < 2 || agg.getDistinctArguments().size() < 2) {
return false;
}
boolean distinctMultiColumns = false;
boolean hasNotSupportMultiDistinctFunc = false;
for (NamedExpression namedExpression : agg.getOutputExpressions()) {
if (!(namedExpression instanceof Alias) || !(namedExpression.child(0) instanceof AggregateFunction)) {
continue;
}
AggregateFunction aggFunc = (AggregateFunction) namedExpression.child(0);
if (aggFunc instanceof SupportMultiDistinct && aggFunc.isDistinct()) {
if (aggFunc.isDistinct()) {
hasNotSupportMultiDistinctFunc = hasNotSupportMultiDistinctFunc
|| !(aggFunc instanceof SupportMultiDistinct);
aliases.add((Alias) namedExpression);
distinctFunc.add(aggFunc);
distinctMultiColumns = distinctMultiColumns || isDistinctMultiColumns(aggFunc);
} else {
otherAggFuncs.add((Alias) namedExpression);
}
}
if (distinctFunc.size() <= 1) {
return false;
if (hasNotSupportMultiDistinctFunc) {
return true;
}
// when this aggregate is not distinctMultiColumns, and group by expressions is not empty
// e.g. sql1: select count(distinct a), count(distinct b) from t1 group by c;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,18 @@ default boolean isDistinct() {
return getOutputExpressions().stream().allMatch(e -> e instanceof Slot)
&& getGroupByExpressions().stream().allMatch(e -> e instanceof Slot);
}

/**
* distinctFuncNum
* @return number of distinct aggregate functions
*/
default int distinctFuncNum() {
int num = 0;
for (AggregateFunction aggFunc : getAggregateFunctions()) {
if (aggFunc.isDistinct()) {
++num;
}
}
return num;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -501,3 +501,33 @@ PhysicalResultSink
-- !null_hash --
1 \N 0 0.0

-- !array_agg_nogby --
[2] [1]

-- !array_agg_gby --
[2] [1]
[2] [1]

-- !array_agg_and_other --
[2] 2

-- !not_split_cte_when_same_col --
3 2 1.5

-- !not_split_cte_when_same_col_shape --
PhysicalResultSink
--hashAgg[DISTINCT_LOCAL]
----hashAgg[GLOBAL]
------hashAgg[LOCAL]
--------PhysicalOlapScan[test_distinct_multi]

-- !not_split_cte_when_same_col_gby --
3 2 1.5

-- !not_split_cte_when_same_col_gby__shape --
PhysicalResultSink
--hashAgg[DISTINCT_LOCAL]
----hashAgg[GLOBAL]
------hashAgg[LOCAL]
--------PhysicalOlapScan[test_distinct_multi]

3 changes: 3 additions & 0 deletions regression-test/data/nereids_syntax_p0/analyze_agg.out
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --

-- !multi_agg_distinct_func --
0 \N

-- !test_sum0 --
0 0
0 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,14 @@ suite("distinct_split") {
sql "create table test_distinct_multi_null_hash(a int, b int, c int, d varchar(10), e date) distributed by hash(a) properties('replication_num'='1');"
sql "insert into test_distinct_multi_null_hash values(1,null,null,null,'2024-12-08');"
qt_null_hash "SELECT a, b, count(distinct c,e), count(distinct concat(d,e))/count(distinct e) FROM test_distinct_multi_null_hash where e = '2024-12-08' GROUP BY a, b;"

// test agg function not support multi_distinct
sql "select array_agg(distinct b), array_agg(distinct a) from test_distinct_multi"
qt_array_agg_nogby "select array_agg(distinct b), array_agg(distinct a) from test_distinct_multi where a=1 and b=2"
qt_array_agg_gby "select array_agg(distinct b), array_agg(distinct a) from test_distinct_multi where a=1 and b=2 group by c"
qt_array_agg_and_other "select array_agg(distinct b), count(distinct a) from test_distinct_multi where b=2"
qt_not_split_cte_when_same_col "select sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi"
qt_not_split_cte_when_same_col_shape "explain shape plan select sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi"
order_qt_not_split_cte_when_same_col_gby "select sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi group by b"
qt_not_split_cte_when_same_col_gby__shape "explain shape plan select sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi group by b"
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,7 @@ suite("analyze_agg") {
tt2.c;
"""

test {
sql "select count(distinct t2.b), variance(distinct t2.c) from t2"
exception "variance(DISTINCT c#2) can't support multi distinct."
}
qt_multi_agg_distinct_func "select count(distinct t2.b), variance(distinct t2.c) from t2"

// should not bind g /g in group by again, otherwise will throw exception
sql "select g / g as nu, sum(c) from t2 group by nu"
Expand Down
Loading