diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java index 6adb3270b5b30c..437d3a186f6abb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java @@ -47,10 +47,8 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.stream.Collectors; /** @@ -216,23 +214,27 @@ private static boolean needTransform(LogicalAggregate agg, List ali if (agg.getSourceRepeat().isPresent()) { return false; } - Set distinctFunc = new HashSet<>(); + if (agg.distinctFuncNum() < 2 || agg.getDistinctArguments().size() < 2) { + return false; + } boolean distinctMultiColumns = false; + boolean hasNotSupportMultiDistinctFunc = false; for (NamedExpression namedExpression : agg.getOutputExpressions()) { if (!(namedExpression instanceof Alias) || !(namedExpression.child(0) instanceof AggregateFunction)) { continue; } AggregateFunction aggFunc = (AggregateFunction) namedExpression.child(0); - if (aggFunc instanceof SupportMultiDistinct && aggFunc.isDistinct()) { + if (aggFunc.isDistinct()) { + hasNotSupportMultiDistinctFunc = hasNotSupportMultiDistinctFunc + || !(aggFunc instanceof SupportMultiDistinct); aliases.add((Alias) namedExpression); - distinctFunc.add(aggFunc); distinctMultiColumns = distinctMultiColumns || isDistinctMultiColumns(aggFunc); } else { otherAggFuncs.add((Alias) namedExpression); } } - if (distinctFunc.size() <= 1) { - return false; + if (hasNotSupportMultiDistinctFunc) { + return true; } // when this aggregate is not distinctMultiColumns, and group by expressions is not empty // e.g. sql1: select count(distinct a), count(distinct b) from t1 group by c; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java index 7a283c740e5912..371dd43f91dc9a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java @@ -106,4 +106,18 @@ default boolean isDistinct() { return getOutputExpressions().stream().allMatch(e -> e instanceof Slot) && getGroupByExpressions().stream().allMatch(e -> e instanceof Slot); } + + /** + * distinctFuncNum + * @return number of distinct aggregate functions + */ + default int distinctFuncNum() { + int num = 0; + for (AggregateFunction aggFunc : getAggregateFunctions()) { + if (aggFunc.isDistinct()) { + ++num; + } + } + return num; + } } diff --git a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out index ede0fb5259cc62..ceed693e8ccb4b 100644 --- a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out +++ b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out @@ -501,3 +501,33 @@ PhysicalResultSink -- !null_hash -- 1 \N 0 0.0 +-- !array_agg_nogby -- +[2] [1] + +-- !array_agg_gby -- +[2] [1] +[2] [1] + +-- !array_agg_and_other -- +[2] 2 + +-- !not_split_cte_when_same_col -- +3 2 1.5 + +-- !not_split_cte_when_same_col_shape -- +PhysicalResultSink +--hashAgg[DISTINCT_LOCAL] +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalOlapScan[test_distinct_multi] + +-- !not_split_cte_when_same_col_gby -- +3 2 1.5 + +-- !not_split_cte_when_same_col_gby__shape -- +PhysicalResultSink +--hashAgg[DISTINCT_LOCAL] +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalOlapScan[test_distinct_multi] + diff --git a/regression-test/data/nereids_syntax_p0/analyze_agg.out b/regression-test/data/nereids_syntax_p0/analyze_agg.out index 8316c4aefe20c8..b0428c9e56bea8 100644 --- a/regression-test/data/nereids_syntax_p0/analyze_agg.out +++ b/regression-test/data/nereids_syntax_p0/analyze_agg.out @@ -1,6 +1,9 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql -- +-- !multi_agg_distinct_func -- +0 \N + -- !test_sum0 -- 0 0 0 3 diff --git a/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy b/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy index 9efe6ce487a4f8..569c086112cc0f 100644 --- a/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy +++ b/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy @@ -213,4 +213,14 @@ suite("distinct_split") { sql "create table test_distinct_multi_null_hash(a int, b int, c int, d varchar(10), e date) distributed by hash(a) properties('replication_num'='1');" sql "insert into test_distinct_multi_null_hash values(1,null,null,null,'2024-12-08');" qt_null_hash "SELECT a, b, count(distinct c,e), count(distinct concat(d,e))/count(distinct e) FROM test_distinct_multi_null_hash where e = '2024-12-08' GROUP BY a, b;" + + // test agg function not support multi_distinct + sql "select array_agg(distinct b), array_agg(distinct a) from test_distinct_multi" + qt_array_agg_nogby "select array_agg(distinct b), array_agg(distinct a) from test_distinct_multi where a=1 and b=2" + qt_array_agg_gby "select array_agg(distinct b), array_agg(distinct a) from test_distinct_multi where a=1 and b=2 group by c" + qt_array_agg_and_other "select array_agg(distinct b), count(distinct a) from test_distinct_multi where b=2" + qt_not_split_cte_when_same_col "select sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi" + qt_not_split_cte_when_same_col_shape "explain shape plan select sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi" + order_qt_not_split_cte_when_same_col_gby "select sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi group by b" + qt_not_split_cte_when_same_col_gby__shape "explain shape plan select sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi group by b" } diff --git a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy index cf93cad471ca4b..5903b9e474e975 100644 --- a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy +++ b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy @@ -70,10 +70,7 @@ suite("analyze_agg") { tt2.c; """ - test { - sql "select count(distinct t2.b), variance(distinct t2.c) from t2" - exception "variance(DISTINCT c#2) can't support multi distinct." - } + qt_multi_agg_distinct_func "select count(distinct t2.b), variance(distinct t2.c) from t2" // should not bind g /g in group by again, otherwise will throw exception sql "select g / g as nu, sum(c) from t2 group by nu"