From 4676966315bf81b12225d7fd1d6445871defa477 Mon Sep 17 00:00:00 2001 From: seawinde Date: Thu, 6 Nov 2025 09:27:33 +0800 Subject: [PATCH 1/2] [fix](nereids) Fix not in aggregate's output err after eliminate by uniform when group sets exist (#56942) Fix not in aggregate's output err after eliminate by uniform when group sets exist if query as following, would cause `ERROR 1105 (HY000): errCode = 2, detailMessage = GROUPING_PREFIX_event_name_group not in aggregate's output` the pr fix this ```sql SELECT CASE WHEN GROUPING(event_date) = 1 THEN '(TOTAL)' ELSE CAST(event_date AS VARCHAR) END AS event_date, user_id, MAX(conversion_level) AS conversion_level, CASE WHEN GROUPING(event_name_group) = 1 THEN '(TOTAL)' ELSE event_name_group END AS event_name_group FROM ( SELECT src.event_date, src.user_id, WINDOW_FUNNEL( 3600 * 24 * 1, 'default', src.event_time, src.event_name = 'shop_buy', src.event_name = 'shop_buy' ) AS conversion_level, src.event_name_group FROM ( SELECT CAST(etb.`@dt` AS DATE) AS event_date, etb.`@event_name` AS event_name, etb.`@event_time` AS event_time, etb.`@event_name` AS event_name_group, etb.`@user_id` AS user_id FROM `test_event` AS etb WHERE etb.`@dt` between '2025-09-03 02:00:00' AND '2025-09-10 01:59:59' AND etb.`@event_name` = 'shop_buy' AND etb.`@user_id` IS NOT NULL AND etb.`@user_id` > '0' ) AS src GROUP BY src.event_date, src.user_id, src.event_name_group ) AS fwt GROUP BY GROUPING SETS ( (user_id), (user_id, event_date), (user_id, event_name_group), (user_id, event_date, event_name_group) ); ``` ### What problem does this PR solve? Issue Number: close #xxx Related PR: #43391 Problem Summary: ### Release note None ### Check List (For Author) - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [ ] Yes. - Does this need documentation? - [ ] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- .../rules/expression/ExpressionRuleType.java | 1 + .../rewrite/EliminateGroupByKeyByUniform.java | 6 ++ .../nereids/rules/rewrite/ExprIdRewriter.java | 59 +++++++++--- .../expressions/VirtualSlotReference.java | 7 ++ .../trees/plans/logical/LogicalAggregate.java | 6 ++ .../eliminate_group_by_key_by_uniform.groovy | 95 +++++++++++++++++++ 6 files changed, 161 insertions(+), 13 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java index fa29d64888cf35..c5378c379796bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRuleType.java @@ -32,6 +32,7 @@ public enum ExpressionRuleType { DIGITAL_MASKING_CONVERT, DISTINCT_PREDICATES, EXPR_ID_REWRITE_REPLACE, + VIRTUAL_EXPR_ID_REWRITE_REPLACE, EXTRACT_COMMON_FACTOR, FOLD_CONSTANT_ON_BE, FOLD_CONSTANT_ON_FE, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKeyByUniform.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKeyByUniform.java index c210c8cca6e622..9928b2d8276819 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKeyByUniform.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKeyByUniform.java @@ -32,6 +32,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; import org.apache.doris.nereids.util.Utils; @@ -79,6 +80,11 @@ public Plan visit(Plan plan, Map replaceMap) { public Plan visitLogicalAggregate(LogicalAggregate aggregate, Map replaceMap) { aggregate = visitChildren(this, aggregate, replaceMap); aggregate = (LogicalAggregate) exprIdReplacer.rewriteExpr(aggregate, replaceMap); + if (aggregate.getSourceRepeat().isPresent()) { + LogicalRepeat sourceRepeat = (LogicalRepeat) exprIdReplacer.rewriteExpr( + aggregate.getSourceRepeat().get(), replaceMap); + aggregate = aggregate.withSourceRepeat(sourceRepeat); + } if (aggregate.getGroupByExpressions().isEmpty() || aggregate.getSourceRepeat().isPresent()) { return aggregate; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExprIdRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExprIdRewriter.java index 615d3cc6c2ea12..d3c0343a8c3579 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExprIdRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ExprIdRewriter.java @@ -29,12 +29,16 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.VirtualSlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.GroupingScalarFunction; +import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter; import org.apache.doris.nereids.trees.plans.Plan; import com.google.common.collect.ImmutableList; import java.util.List; import java.util.Map; +import java.util.Optional; /** replace SlotReference ExprId in logical plans */ public class ExprIdRewriter extends ExpressionRewrite { @@ -75,18 +79,10 @@ public Plan rewriteExpr(Plan plan, Map replaceMap) { * SlotReference:a#0 -> a#3, a#1 -> a#7 * */ public static class ReplaceRule implements ExpressionPatternRuleFactory { - private final Map replaceMap; - - public ReplaceRule(Map replaceMap) { - this.replaceMap = replaceMap; - } - - @Override - public List> buildRules() { - return ImmutableList.of( - matchesType(SlotReference.class).thenApply(ctx -> { - Slot slot = ctx.expr; - + private static final DefaultExpressionRewriter> SLOT_REPLACER = + new DefaultExpressionRewriter>() { + @Override + public Expression visitSlotReference(SlotReference slot, Map replaceMap) { ExprId newId = replaceMap.get(slot.getExprId()); if (newId == null) { return slot; @@ -100,7 +96,44 @@ public List> buildRules() { lastId = newId; } } - }).toRule(ExpressionRuleType.EXPR_ID_REWRITE_REPLACE) + } + }; + private final Map replaceMap; + + public ReplaceRule(Map replaceMap) { + this.replaceMap = replaceMap; + } + + @Override + public List> buildRules() { + return ImmutableList.of( + matchesType(SlotReference.class).thenApply(ctx -> { + Slot slot = ctx.expr; + return slot.accept(SLOT_REPLACER, replaceMap); + }).toRule(ExpressionRuleType.EXPR_ID_REWRITE_REPLACE), + matchesType(VirtualSlotReference.class).thenApply(ctx -> { + VirtualSlotReference virtualSlot = ctx.expr; + return virtualSlot.accept(new DefaultExpressionRewriter>() { + @Override + public Expression visitVirtualReference(VirtualSlotReference virtualSlot, + Map replaceMap) { + Optional originExpression = virtualSlot.getOriginExpression(); + if (!originExpression.isPresent()) { + return virtualSlot; + } + GroupingScalarFunction groupingScalarFunction = originExpression.get(); + GroupingScalarFunction rewrittenFunction = + (GroupingScalarFunction) groupingScalarFunction.accept( + SLOT_REPLACER, replaceMap); + if (!rewrittenFunction.children().equals(groupingScalarFunction.children())) { + return virtualSlot.withOriginExpressionAndComputeLongValueMethod( + Optional.of(rewrittenFunction), + rewrittenFunction::computeVirtualSlotValue); + } + return virtualSlot; + } + }, replaceMap); + }).toRule(ExpressionRuleType.VIRTUAL_EXPR_ID_REWRITE_REPLACE) ); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VirtualSlotReference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VirtualSlotReference.java index 42be621045977b..bac559f407d0ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VirtualSlotReference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/VirtualSlotReference.java @@ -156,6 +156,13 @@ public VirtualSlotReference withExprId(ExprId exprId) { originExpression, computeLongValueMethod); } + public VirtualSlotReference withOriginExpressionAndComputeLongValueMethod( + Optional originExpression, + Function> computeLongValueMethod) { + return new VirtualSlotReference(exprId, name.get(), dataType, nullable, qualifier, + originExpression, computeLongValueMethod); + } + @Override public Slot withIndexInSql(Pair index) { return this; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java index 3ca6d433228ed7..1e062d603ece7b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java @@ -347,6 +347,12 @@ public LogicalAggregate withNormalized(List normalizedGroupBy, hasPushed, sourceRepeat, Optional.empty(), Optional.empty(), normalizedChild); } + public LogicalAggregate withSourceRepeat(LogicalRepeat sourceRepeat) { + return new LogicalAggregate<>(groupByExpressions, outputExpressions, normalized, ordinalIsResolved, + generated, hasPushed, withInProjection, Optional.ofNullable(sourceRepeat), + Optional.empty(), Optional.empty(), child()); + } + private boolean isUniqueGroupByUnique(NamedExpression namedExpression) { if (namedExpression.children().size() != 1) { return false; diff --git a/regression-test/suites/nereids_rules_p0/eliminate_gby_key/eliminate_group_by_key_by_uniform.groovy b/regression-test/suites/nereids_rules_p0/eliminate_gby_key/eliminate_group_by_key_by_uniform.groovy index c5652e76d37a00..4b5b63ead8f6d0 100644 --- a/regression-test/suites/nereids_rules_p0/eliminate_gby_key/eliminate_group_by_key_by_uniform.groovy +++ b/regression-test/suites/nereids_rules_p0/eliminate_gby_key/eliminate_group_by_key_by_uniform.groovy @@ -237,4 +237,99 @@ suite("eliminate_group_by_key_by_uniform") { qt_to_limit_join_project_shape "explain shape plan select 1 as c1 from test1 t1 inner join (select * from test2 where b=105) t2 on t1.a=t2.a group by c1 order by 1;" qt_to_limit_project_uniform_shape "explain shape plan select 1 as c1 from eli_gbk_by_uniform_t group by c1" qt_to_limit_multi_group_by_shape "explain shape plan select 2 as c1 from eli_gbk_by_uniform_t where a=1 group by c1,a" + + // test when has repeat above agg + // disable CONSTANT_PROPAGATION rules to test eliminate aggregate by uniform + sql """set disable_nereids_rules = 'CONSTANT_PROPAGATION';""" + + + sql """drop table if exists test_event""" + sql """ + CREATE TABLE `test_event` ( + `@dt` DATETIME NOT NULL COMMENT '', + `@event_name` VARCHAR(255) NOT NULL COMMENT '', + `@user_id` VARCHAR(100) NOT NULL COMMENT '', + `@event_time` DATETIME NOT NULL COMMENT '', + `@event_property_1` VARCHAR(255) NULL + ) + ENGINE=OLAP + DUPLICATE KEY(`@dt`, `@event_name`, `@user_id`) + COMMENT '' + PARTITION BY RANGE(`@dt`) + ( + PARTITION p202509 VALUES [('2025-09-01 00:00:00'), ('2025-10-05 00:00:00')) + ) + DISTRIBUTED BY HASH(`@user_id`) BUCKETS 10 + PROPERTIES ( + "replication_num" = "1", + "dynamic_partition.enable" = "true", + "dynamic_partition.time_unit" = "MONTH", + "dynamic_partition.start" = "-2147483648", + "dynamic_partition.end" = "3", + "dynamic_partition.prefix" = "p", + "dynamic_partition.buckets" = "10" + ); + """ + + sql """ + INSERT INTO `test_event` (`@dt`, `@event_name`, `@user_id`, `@event_time`, `@event_property_1`) + VALUES + ('2025-09-03 10:00:00', 'shop_buy', 'user_A', '2025-09-03 10:00:00', 'prop_A1'), + ('2025-09-03 10:01:00', 'shop_buy', 'user_A', '2025-09-03 10:01:00', 'prop_A2'), + ('2025-09-04 15:30:00', 'shop_buy', 'user_A', '2025-09-04 15:30:00', 'prop_A3'), + ('2025-09-05 08:00:00', 'shop_buy', 'user_B', '2025-09-05 08:00:00', 'prop_B1'), + ('2025-09-05 08:05:00', 'shop_buy', 'user_B', '2025-09-05 08:05:00', 'prop_B2'), + ('2025-09-09 23:59:59', 'shop_buy', 'user_C', '2025-09-09 23:59:59', 'prop_C1'), + ('2025-10-01 00:00:00', 'shop_buy', 'user_D', '2025-10-01 00:00:00', 'prop_D1'); + """ + + sql """ + SELECT + CASE WHEN GROUPING(event_date) = 1 THEN '(TOTAL)' ELSE CAST(event_date AS VARCHAR) END AS event_date, + user_id, + MAX(conversion_level) AS conversion_level, + CASE WHEN GROUPING(event_name_group) = 1 THEN '(TOTAL)' ELSE event_name_group END AS event_name_group +FROM + ( + SELECT + src.event_date, + src.user_id, + WINDOW_FUNNEL( + 3600 * 24 * 1, + 'default', + src.event_time, + src.event_name = 'shop_buy', + src.event_name = 'shop_buy' + ) AS conversion_level, + src.event_name_group + FROM + ( + SELECT + CAST(etb.`@dt` AS DATE) AS event_date, + etb.`@event_name` AS event_name, + etb.`@event_time` AS event_time, + etb.`@event_name` AS event_name_group, + etb.`@user_id` AS user_id + FROM + `test_event` AS etb + WHERE + etb.`@dt` between '2025-09-03 02:00:00' AND '2025-09-10 01:59:59' + AND etb.`@event_name` = 'shop_buy' + AND etb.`@user_id` IS NOT NULL + AND etb.`@user_id` > '0' + ) AS src + GROUP BY + src.event_date, + src.user_id, + src.event_name_group + ) AS fwt +GROUP BY + GROUPING SETS ( + (user_id), + (user_id, event_date), + (user_id, event_name_group), + (user_id, event_date, event_name_group) + ); + + """ } \ No newline at end of file From 412d361621b03d2fd6eb0afa20c02f1bdbc55b93 Mon Sep 17 00:00:00 2001 From: wusi Date: Mon, 10 Nov 2025 19:40:43 +0800 Subject: [PATCH 2/2] fix compile fail --- .../doris/nereids/trees/plans/logical/LogicalAggregate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java index 1e062d603ece7b..91ba406bc746c6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java @@ -349,7 +349,7 @@ public LogicalAggregate withNormalized(List normalizedGroupBy, public LogicalAggregate withSourceRepeat(LogicalRepeat sourceRepeat) { return new LogicalAggregate<>(groupByExpressions, outputExpressions, normalized, ordinalIsResolved, - generated, hasPushed, withInProjection, Optional.ofNullable(sourceRepeat), + generated, hasPushed, Optional.ofNullable(sourceRepeat), Optional.empty(), Optional.empty(), child()); }