From ed12bf211102b93660c7cd37eb6b62618d7903c0 Mon Sep 17 00:00:00 2001 From: morrySnow Date: Thu, 18 Dec 2025 10:30:20 +0800 Subject: [PATCH] [fix](rbo) use wrong child's output of set operation (#59105) Related PR: #22203 Problem Summary: This pull request refines the logic in the `EliminateEmptyRelation` rewrite rule, focusing on how set operations (`UNION` and `EXCEPT`) handle empty relations in query plans. The changes improve efficiency and code clarity by processing only when necessary, optimizing builder usage, and simplifying project and aggregate node construction. **Set Operation Handling Improvements:** * Updated the logic for `UNION` nodes to only process and eliminate empty relation children when at least one is present, reducing unnecessary computation. Builders are now initialized with expected sizes for efficiency, and redundant code for handling constant expressions was removed. * Improved handling of `EXCEPT` nodes by checking for empty relations only among the right-hand children, and optimizing builder initialization. The construction of aggregate and project nodes now uses the correct outputs directly, streamlining the process. --- .../rules/rewrite/EliminateEmptyRelation.java | 94 ++++++++----------- .../data/empty_relation/eliminate_empty.out | 17 ++-- .../pull_up_predicate_set_op.out | 4 +- 3 files changed, 50 insertions(+), 65 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateEmptyRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateEmptyRelation.java index e62716b2879010..55e79e4742e744 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateEmptyRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateEmptyRelation.java @@ -23,7 +23,6 @@ import org.apache.doris.nereids.trees.UnaryNode; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.ExprId; -import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; @@ -34,12 +33,10 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; -import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import java.util.List; import java.util.Optional; @@ -85,13 +82,20 @@ public List buildRules() { }).toRule(RuleType.ELIMINATE_AGG_ON_EMPTYRELATION), // after BuildAggForUnion rule, union may have more than 2 children. logicalUnion(multi()).then(union -> { - if (union.children().isEmpty()) { - // example: select * from (select 1,2 union select 3, 4) T; - // the children size is 0. (1,2) and (3,4) are stored in union.constantExprsList + boolean needProcess = union.arity() == 1; + for (int i = 0; i < union.arity(); i++) { + if (union.child(i) instanceof EmptyRelation) { + needProcess = true; + break; + } + } + if (!needProcess) { return null; } - ImmutableList.Builder nonEmptyChildrenBuilder = ImmutableList.builder(); - ImmutableList.Builder> nonEmptyOutputsBuilder = ImmutableList.builder(); + ImmutableList.Builder nonEmptyChildrenBuilder + = ImmutableList.builderWithExpectedSize(union.arity()); + ImmutableList.Builder> nonEmptyOutputsBuilder + = ImmutableList.builderWithExpectedSize(union.arity()); for (int i = 0; i < union.arity(); i++) { if (!(union.child(i) instanceof EmptyRelation)) { nonEmptyChildrenBuilder.add(union.child(i)); @@ -99,54 +103,30 @@ public List buildRules() { } } List nonEmptyChildren = nonEmptyChildrenBuilder.build(); + List> nonEmptyOutputs = nonEmptyOutputsBuilder.build(); if (nonEmptyChildren.isEmpty()) { if (union.getConstantExprsList().isEmpty()) { return new LogicalEmptyRelation( ConnectContext.get().getStatementContext().getNextRelationId(), union.getOutput()); - } else if (union.getConstantExprsList().size() == 1) { - List constantExprs = union.getConstantExprsList().get(0); - ImmutableList.Builder newOneRowRelationOutput - = ImmutableList.builderWithExpectedSize(constantExprs.size()); - for (int i = 0; i < constantExprs.size(); i++) { - NamedExpression setOutput = union.getOutput().get(i); - NamedExpression constantExpr = constantExprs.get(i); - Expression realConstantExpr = constantExpr instanceof Alias - ? ((Alias) constantExpr).child() : constantExpr; - Alias oneRowRelationOutput = new Alias( - setOutput.getExprId(), realConstantExpr, setOutput.getName()); - newOneRowRelationOutput.add(oneRowRelationOutput); - } - return new LogicalOneRowRelation( - ConnectContext.get().getStatementContext().getNextRelationId(), - newOneRowRelationOutput.build()); } else { return union.withChildrenAndTheirOutputs(ImmutableList.of(), ImmutableList.of()); } } else if (nonEmptyChildren.size() == 1) { if (union.getConstantExprsList().isEmpty()) { - Plan child = nonEmptyChildren.get(0); + ImmutableList.Builder projectsBuilder = ImmutableList.builder(); List unionOutput = union.getOutput(); - int childIdx = union.children().indexOf(nonEmptyChildren.get(0)); - if (childIdx >= 0) { - List childOutput = union.getRegularChildOutput(childIdx); - List projects = Lists.newArrayList(); - for (int i = 0; i < unionOutput.size(); i++) { - ExprId id = unionOutput.get(i).getExprId(); - Alias alias = new Alias(id, childOutput.get(i), unionOutput.get(i).getName()); - projects.add(alias); - } - - return new LogicalProject<>(projects, child); - } else { - // should not hit here. - return null; + for (int i = 0; i < unionOutput.size(); i++) { + ExprId id = unionOutput.get(i).getExprId(); + Alias alias = new Alias(id, nonEmptyOutputs.get(0).get(i), unionOutput.get(i).getName()); + projectsBuilder.add(alias); } + return new LogicalProject<>(projectsBuilder.build(), nonEmptyChildren.get(0)); } } if (union.children().size() != nonEmptyChildren.size()) { - return union.withChildrenAndTheirOutputs(nonEmptyChildren, nonEmptyOutputsBuilder.build()); + return union.withChildrenAndTheirOutputs(nonEmptyChildren, nonEmptyOutputs); } else { // no empty relation child, do not change union return null; @@ -192,8 +172,20 @@ public List buildRules() { ConnectContext.get().getStatementContext().getNextRelationId(), except.getOutput()); } else { - ImmutableList.Builder nonEmptyChildrenBuilder = ImmutableList.builder(); - ImmutableList.Builder> nonEmptyOutputsBuilder = ImmutableList.builder(); + boolean needProcess = false; + for (int i = 1; i < except.arity(); i++) { + if (except.child(i) instanceof EmptyRelation) { + needProcess = true; + break; + } + } + if (!needProcess) { + return null; + } + ImmutableList.Builder nonEmptyChildrenBuilder + = ImmutableList.builderWithExpectedSize(except.arity()); + ImmutableList.Builder> nonEmptyOutputsBuilder + = ImmutableList.builderWithExpectedSize(except.arity()); for (int i = 0; i < except.arity(); i++) { if (!(except.child(i) instanceof EmptyRelation)) { nonEmptyChildrenBuilder.add(except.child(i)); @@ -201,31 +193,27 @@ public List buildRules() { } } List nonEmptyChildren = nonEmptyChildrenBuilder.build(); + List> nonEmptyOutputs = nonEmptyOutputsBuilder.build(); if (nonEmptyChildren.size() == 1) { // the first child is not empty, others are all empty // case 1. FIRST except(distinct) empty = > project(AGG(FIRST)) // case 2. FIRST except(all) empty = > project(FIRST) Plan projectChild; if (except.getQualifier() == SetOperation.Qualifier.DISTINCT) { - List firstOutputNamedExpressions = first.getOutput() - .stream().map(slot -> (NamedExpression) slot) - .collect(ImmutableList.toImmutableList()); - projectChild = new LogicalAggregate<>(ImmutableList.copyOf(firstOutputNamedExpressions), - firstOutputNamedExpressions, true, Optional.empty(), first); + projectChild = new LogicalAggregate<>((List) nonEmptyOutputs.get(0), + (List) nonEmptyOutputs.get(0), true, Optional.empty(), first); } else { projectChild = first; } List exceptOutput = except.getOutput(); - List projectInputSlots = projectChild.getOutput(); - List projects = Lists.newArrayList(); + List projectInputSlots = nonEmptyOutputs.get(0); + ImmutableList.Builder projectsBuilder = ImmutableList.builder(); for (int i = 0; i < exceptOutput.size(); i++) { ExprId id = exceptOutput.get(i).getExprId(); Alias alias = new Alias(id, projectInputSlots.get(i), exceptOutput.get(i).getName()); - projects.add(alias); + projectsBuilder.add(alias); } - return new LogicalProject<>(projects, projectChild); - } else if (nonEmptyChildren.size() == except.children().size()) { - return null; + return new LogicalProject<>(projectsBuilder.build(), projectChild); } else { return except.withChildrenAndTheirOutputs(nonEmptyChildren, nonEmptyOutputsBuilder.build()); } diff --git a/regression-test/data/empty_relation/eliminate_empty.out b/regression-test/data/empty_relation/eliminate_empty.out index 804cbb486bd845..5379d0775a6bf1 100644 --- a/regression-test/data/empty_relation/eliminate_empty.out +++ b/regression-test/data/empty_relation/eliminate_empty.out @@ -142,22 +142,19 @@ PhysicalResultSink -- !union_to_onerow_1_shape -- PhysicalResultSink ---PhysicalDistribute[DistributionSpecGather] -----hashAgg[GLOBAL] -------PhysicalDistribute[DistributionSpecHash] ---------hashAgg[LOCAL] -----------PhysicalOneRowRelation +--PhysicalLimit[GLOBAL] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalProject +--------PhysicalUnion -- !union_to_onerow_2 -- 1 2 -- !union_to_onerow_2_shape -- PhysicalResultSink ---PhysicalDistribute[DistributionSpecGather] -----hashAgg[GLOBAL] -------PhysicalDistribute[DistributionSpecHash] ---------hashAgg[LOCAL] -----------PhysicalOneRowRelation +--PhysicalLimit[GLOBAL] +----PhysicalDistribute[DistributionSpecGather] +------PhysicalUnion -- !prune_partition1 -- PhysicalResultSink diff --git a/regression-test/data/nereids_rules_p0/infer_predicate/pull_up_predicate_set_op.out b/regression-test/data/nereids_rules_p0/infer_predicate/pull_up_predicate_set_op.out index a7ac6765f624d2..24759dc85762bf 100644 --- a/regression-test/data/nereids_rules_p0/infer_predicate/pull_up_predicate_set_op.out +++ b/regression-test/data/nereids_rules_p0/infer_predicate/pull_up_predicate_set_op.out @@ -139,8 +139,8 @@ PhysicalResultSink -- !union_all_const_empty_relation -- PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t3.a = expr_cast(a as INT)) and (t3.b = t.b)) otherCondition=() -----PhysicalOneRowRelation +--NestedLoopJoin[INNER_JOIN] +----PhysicalUnion ----filter((t3.a = 2) and (t3.b = 'dd')) ------PhysicalOlapScan[test_pull_up_predicate_set_op3]