Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.apache.doris.nereids.rules.rewrite.CountDistinctRewrite;
import org.apache.doris.nereids.rules.rewrite.EliminateAggregate;
import org.apache.doris.nereids.rules.rewrite.EliminateDedupJoinCondition;
import org.apache.doris.nereids.rules.rewrite.EliminateEmptyRelation;
import org.apache.doris.nereids.rules.rewrite.EliminateFilter;
import org.apache.doris.nereids.rules.rewrite.EliminateGroupByConstant;
import org.apache.doris.nereids.rules.rewrite.EliminateLimit;
Expand Down Expand Up @@ -227,7 +228,8 @@ public class Rewriter extends AbstractBatchJobExecutor {
bottomUp(RuleSet.PUSH_DOWN_FILTERS),
// after eliminate outer join, we can move some filters to join.otherJoinConjuncts,
// this can help to translate plan to backend
topDown(new PushFilterInsideJoin())
topDown(new PushFilterInsideJoin()),
topDown(new ExpressionNormalization())
),

custom(RuleType.CHECK_DATA_TYPES, CheckDataTypes::new),
Expand Down Expand Up @@ -303,6 +305,10 @@ public class Rewriter extends AbstractBatchJobExecutor {
new CollectFilterAboveConsumer(),
new CollectProjectAboveConsumer()
)
),

topic("eliminate empty relation",
bottomUp(new EliminateEmptyRelation())
)
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ public enum RuleType {
ELIMINATE_GROUP_BY_CONSTANT(RuleTypeClass.REWRITE),
ELIMINATE_ORDER_BY_CONSTANT(RuleTypeClass.REWRITE),
ELIMINATE_HINT(RuleTypeClass.REWRITE),
ELIMINATE_JOIN_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_FILTER_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_AGG_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_UNION_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_INTERSECTION_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
ELIMINATE_EXCEPT_ON_EMPTYRELATION(RuleTypeClass.REWRITE),
INFER_PREDICATES(RuleTypeClass.REWRITE),
INFER_AGG_NOT_NULL(RuleTypeClass.REWRITE),
INFER_SET_OPERATOR_DISTINCT(RuleTypeClass.REWRITE),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ private void checkAllSlotReferenceFromChildren(Plan plan) {
notFromChildren = removeValidSlotsNotFromChildren(notFromChildren, childrenOutput);
if (!notFromChildren.isEmpty()) {
throw new AnalysisException(String.format("Input slot(s) not in child's output: %s in plan: %s,"
+ " child output is: %s",
+ " child output is: %s\n" + "plan tree:\n" + plan.treeString(),
StringUtils.join(notFromChildren.stream()
.map(ExpressionTrait::toString)
.collect(Collectors.toSet()), ", "), plan,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.ExprId;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.algebra.EmptyRelation;
import org.apache.doris.nereids.trees.plans.algebra.SetOperation;
import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.qe.ConnectContext;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;

import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

/**
* try to eliminate sub plan tree which contains EmptyRelation
*/
public class EliminateEmptyRelation implements RewriteRuleFactory {

@Override
public List<Rule> buildRules() {
return ImmutableList.of(
// join->empty
logicalJoin(any(), any())
.when(this::hasEmptyRelationChild)
.then(join -> {
if (canReplaceJoinByEmptyRelation(join)) {
return new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
join.getOutput());
}
return join;
})
.toRule(RuleType.ELIMINATE_JOIN_ON_EMPTYRELATION),
logicalFilter(logicalEmptyRelation())
.then(filter -> new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
filter.getOutput())
).toRule(RuleType.ELIMINATE_FILTER_ON_EMPTYRELATION),
logicalAggregate(logicalEmptyRelation())
.when(agg -> !agg.getGroupByExpressions().isEmpty())
.then(agg -> new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
agg.getOutput())
).toRule(RuleType.ELIMINATE_AGG_ON_EMPTYRELATION),

// after BuildAggForUnion rule, union may have more than 2 children.
logicalUnion(multi()).then(union -> {
if (union.children().size() == 0) {
// example: select * from (select 1,2 union select 3, 4) T;
// the children size is 0. (1,2) and (3,4) are stored in union.constantExprsList
return null;
}
List<Plan> nonEmptyChildren = union.children().stream()
.filter(child -> !(child instanceof EmptyRelation))
.collect(Collectors.toList());
if (nonEmptyChildren.isEmpty()) {
if (union.getConstantExprsList().isEmpty()) {
return new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
union.getOutput());
} else {
return union.withChildren(ImmutableList.of());
}
} else if (nonEmptyChildren.size() == 1) {
if (union.getConstantExprsList().isEmpty()) {
Plan child = nonEmptyChildren.get(0);
List<Slot> unionOutput = union.getOutput();
List<Slot> childOutput = child.getOutput();
List<NamedExpression> projects = Lists.newArrayList();
for (int i = 0; i < unionOutput.size(); i++) {
ExprId id = unionOutput.get(i).getExprId();
Alias alias = new Alias(id, childOutput.get(i), unionOutput.get(i).getName());
projects.add(alias);
}

LogicalProject project = new LogicalProject<>(projects, child);
return project;
}
}

if (union.children().size() != nonEmptyChildren.size()) {
return union.withChildren(ImmutableList.copyOf(nonEmptyChildren));
} else {
// no empty relation child, do not change union
return null;
}
}).toRule(RuleType.ELIMINATE_UNION_ON_EMPTYRELATION),
// set intersect
logicalIntersect(multi()).then(intersect -> {
List<Plan> emptyChildren = intersect.children().stream()
.filter(EmptyRelation.class::isInstance)
.collect(Collectors.toList());
if (emptyChildren.isEmpty()) {
// no empty relation child, plan not changed
return null;
} else {
// there is empty relation child, the intersection result is empty.
return new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
intersect.getOutput());
}
}).toRule(RuleType.ELIMINATE_INTERSECTION_ON_EMPTYRELATION),
// set except
logicalExcept(multi()).then(except -> {
Plan first = except.child(0);
if (first instanceof EmptyRelation) {
// empty except any => empty
return new LogicalEmptyRelation(
ConnectContext.get().getStatementContext().getNextRelationId(),
except.getOutput());
} else {
List<Plan> nonEmptyChildren = except.children().stream()
.filter(child -> !(child instanceof EmptyRelation))
.collect(Collectors.toList());
if (nonEmptyChildren.size() == 1) {
// the first child is not empty, others are all empty
// case 1. FIRST except(distinct) empty = > project(AGG(FIRST))
// case 2. FIRST except(all) empty = > project(FIRST)
Plan projectChild;
if (except.getQualifier() == SetOperation.Qualifier.DISTINCT) {
List<NamedExpression> firstOutputNamedExpressions = first.getOutput()
.stream().map(slot -> (NamedExpression) slot)
.collect(ImmutableList.toImmutableList());
projectChild = new LogicalAggregate<>(ImmutableList.copyOf(firstOutputNamedExpressions),
firstOutputNamedExpressions, true, Optional.empty(), first);
} else {
projectChild = first;
}

List<Slot> exceptOutput = except.getOutput();
List<Slot> projectInputSlots = projectChild.getOutput();

List<NamedExpression> projects = Lists.newArrayList();
for (int i = 0; i < exceptOutput.size(); i++) {
ExprId id = exceptOutput.get(i).getExprId();
Alias alias = new Alias(id, projectInputSlots.get(i), exceptOutput.get(i).getName());
projects.add(alias);
}
LogicalProject project = new LogicalProject(projects, projectChild);
return project;
} else if (nonEmptyChildren.size() == except.children().size()) {
return null;
} else {
return except.withChildren(nonEmptyChildren);
}
}
}).toRule(RuleType.ELIMINATE_EXCEPT_ON_EMPTYRELATION)
);
}

private boolean hasEmptyRelationChild(LogicalJoin join) {
return join.left() instanceof EmptyRelation || join.right() instanceof EmptyRelation;
}

private boolean canReplaceJoinByEmptyRelation(LogicalJoin join) {
return (join.getJoinType() == JoinType.INNER_JOIN
|| join.getJoinType() == JoinType.LEFT_SEMI_JOIN
|| join.getJoinType() == JoinType.RIGHT_SEMI_JOIN
|| join.getJoinType() == JoinType.CROSS_JOIN)
|| (join.getJoinType() == JoinType.LEFT_OUTER_JOIN && join.left() instanceof EmptyRelation)
|| (join.getJoinType() == JoinType.RIGHT_OUTER_JOIN && join.right() instanceof EmptyRelation);
}

}
72 changes: 72 additions & 0 deletions regression-test/data/empty_relation/eliminate_empty.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !onerow_union --
1 2
3 4

-- !join --
PhysicalResultSink
--PhysicalEmptyRelation

-- !explain_union_empty_data --
PhysicalResultSink
--PhysicalDistribute
----hashAgg[LOCAL]
------PhysicalProject
--------PhysicalOlapScan[nation]

-- !union_empty_data --
1

-- !explain_union_empty_empty --
PhysicalResultSink
--PhysicalEmptyRelation

-- !union_empty_empty --

-- !union_emtpy_onerow --
10

-- !explain_intersect_data_empty --
PhysicalResultSink
--PhysicalEmptyRelation

-- !explain_intersect_empty_data --
PhysicalResultSink
--PhysicalEmptyRelation

-- !explain_except_data_empty --
PhysicalResultSink
--PhysicalDistribute
----PhysicalProject
------hashAgg[LOCAL]
--------PhysicalProject
----------PhysicalOlapScan[nation]

-- !explain_except_data_empty_data --
PhysicalResultSink
--PhysicalDistribute
----PhysicalExcept
------PhysicalDistribute
--------PhysicalProject
----------PhysicalOlapScan[nation]
------PhysicalDistribute
--------PhysicalProject
----------filter(( not (n_nationkey = 1)))
------------PhysicalOlapScan[nation]

-- !except_data_empty_data --
1

-- !explain_except_empty_data --
PhysicalResultSink
--PhysicalEmptyRelation

-- !intersect_data_empty --

-- !intersect_empty_data --

-- !except_data_empty --
1

-- !except_empty_data --

Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------PhysicalDistribute
--------------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
----------------------PhysicalProject
------------------------filter('s' IN ('s', 'w'))
--------------------------PhysicalOlapScan[store_sales]
------------------------PhysicalOlapScan[store_sales]
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------filter('s' IN ('s', 'w')d_year IN (2001, 2002))
--------------------------filter(d_year IN (2001, 2002))
----------------------------PhysicalOlapScan[date_dim]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter('s' IN ('s', 'w'))
------------------------PhysicalOlapScan[customer]
----------------------PhysicalOlapScan[customer]
------PhysicalProject
--------hashAgg[GLOBAL]
----------PhysicalDistribute
Expand All @@ -31,16 +29,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------PhysicalDistribute
--------------------hashJoin[INNER_JOIN](web_sales.ws_sold_date_sk = date_dim.d_date_sk)
----------------------PhysicalProject
------------------------filter('w' IN ('s', 'w'))
--------------------------PhysicalOlapScan[web_sales]
------------------------PhysicalOlapScan[web_sales]
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------filter(d_year IN (2001, 2002)'w' IN ('s', 'w'))
--------------------------filter(d_year IN (2001, 2002))
----------------------------PhysicalOlapScan[date_dim]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter('w' IN ('s', 'w'))
------------------------PhysicalOlapScan[customer]
----------------------PhysicalOlapScan[customer]
--PhysicalResultSink
----PhysicalTopN
------PhysicalDistribute
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--PhysicalCteAnchor ( cteId=CTEId#2 )
----PhysicalCteProducer ( cteId=CTEId#2 )
------PhysicalProject
--------NestedLoopJoin[INNER_JOIN](cast(ssales as DOUBLE) > cast(((cast(95 as DECIMALV3(8, 5)) / 100.0) * tpcds_cmax) as DOUBLE))
--------NestedLoopJoin[INNER_JOIN](cast(ssales as DOUBLE) > cast((0.9500 * tpcds_cmax) as DOUBLE))
----------hashAgg[LOCAL]
------------PhysicalProject
--------------hashJoin[INNER_JOIN](store_sales.ss_customer_sk = customer.c_customer_sk)
Expand Down
Loading