Skip to content

Commit

Permalink
[refactor](Nereids) New expression extractor for partitions pruning (a…
Browse files Browse the repository at this point in the history
…pache#36326)

An exception throw in TryEliminateUninterestedPredicates, for this case

 CREATE TABLE `tbltest` (
  `id` INT NULL,
  `col2` VARCHAR(255) NULL,
  `col3` VARCHAR(255) NULL,
  `dt` DATE NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`, `col2`)
PARTITION BY RANGE(`dt`)
(PARTITION p20240617 VALUES [('2024-06-17'), ('2024-06-18')))
DISTRIBUTED BY HASH(`id`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);

select * from tbltest
where
  case
    when col2 = 'xxx' and col3='yyy' then false -- note this is not about partition column
    when col2 in ('xxx') then false
    when col2 like 'xxx%' then false
    else true
  end

The CaseWhen require children should be WhenClause,
TryEliminateUninterestedPredicates maybe rewrite the WhenClause to
true/false predicate, and cause this exception:

ERROR 1105 (HY000): errCode = 2, detailMessage = The children format needs to be [WhenClause+, DefaultValue?]

Original extractor(TryEliminateUninterestedPredicates.java) caused some
errors while try to derive the expressions which can be used for pruning
partitions.
I tried to write a new extractor(and with unit tests) for pruning
partitions, it is more simple and reliable (I think).

The theory of extractor is pretty simple:
A:Sort the expression in two kinds:
  1. evaluable-expression (let's mark it as E).
    Expressions that can be evaluated in the partition pruning stage.
    In the other word: not contains non-partition slots or deterministic
    expression.
  2. un-evaluable-expression (let's mark it as UE).
    Expressions that can NOT be evaluated in the partition pruning stage.
    In the other word: contains non-partition slots or deterministic
    expression.

B: Travel the predicate, only point on AND and OR operator, following the rule:
  (E and UE) -> (E and TRUE) -> E
  (UE and UE) -> TRUE
  (E and E) -> (E and E)
  (E or UE) -> TRUE
  (UE or UE) -> TRUE
  (E or E) -> (E or E)
  • Loading branch information
XuPengfei-1020 authored and dataroaring committed Jun 21, 2024
1 parent b243926 commit f4a732e
Show file tree
Hide file tree
Showing 7 changed files with 475 additions and 180 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
*/
public class UnboundFunction extends Function implements Unbound, PropagateNullable {
private final String dbName;
private final String name;
private final boolean isDistinct;

public UnboundFunction(String name, List<Expression> arguments) {
Expand All @@ -52,16 +51,11 @@ public UnboundFunction(String name, boolean isDistinct, List<Expression> argumen
}

public UnboundFunction(String dbName, String name, boolean isDistinct, List<Expression> arguments) {
super(arguments);
super(name, arguments);
this.dbName = dbName;
this.name = Objects.requireNonNull(name, "name cannot be null");
this.isDistinct = isDistinct;
}

public String getName() {
return name;
}

@Override
public String getExpressionName() {
if (!this.exprName.isPresent()) {
Expand All @@ -87,13 +81,13 @@ public String toSql() throws UnboundException {
String params = children.stream()
.map(Expression::toSql)
.collect(Collectors.joining(", "));
return name + "(" + (isDistinct ? "distinct " : "") + params + ")";
return getName() + "(" + (isDistinct ? "distinct " : "") + params + ")";
}

@Override
public String toString() {
String params = Joiner.on(", ").join(children);
return "'" + name + "(" + (isDistinct ? "distinct " : "") + params + ")";
return "'" + getName() + "(" + (isDistinct ? "distinct " : "") + params + ")";
}

@Override
Expand All @@ -103,7 +97,7 @@ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {

@Override
public UnboundFunction withChildren(List<Expression> children) {
return new UnboundFunction(dbName, name, isDistinct, children);
return new UnboundFunction(dbName, getName(), isDistinct, children);
}

@Override
Expand All @@ -118,11 +112,11 @@ public boolean equals(Object o) {
return false;
}
UnboundFunction that = (UnboundFunction) o;
return isDistinct == that.isDistinct && name.equals(that.name);
return isDistinct == that.isDistinct && getName().equals(that.getName());
}

@Override
public int hashCode() {
return Objects.hash(name, isDistinct);
return Objects.hash(getName(), isDistinct);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.expression.rules;

import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
import org.apache.doris.nereids.rules.expression.rules.PartitionPruneExpressionExtractor.Context;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Or;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SubqueryExpr;
import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;

import com.google.common.annotations.VisibleForTesting;

import java.util.Objects;
import java.util.Set;

/**
* PartitionPruneExpressionExtractor
*
* This rewriter only used to extract the expression that can be used in partition pruning from
* the whole predicate expression.
* The theory of extractor is pretty simple:
* A:Sort the expression in two kinds:
* 1. evaluable-expression (let's mark it as E).
* Expressions that can be evaluated in the partition pruning stage.
* In the other word: not contains non-partition slots or deterministic expression.
* 2. un-evaluable-expression (let's mark it as UE).
* Expressions that can NOT be evaluated in the partition pruning stage.
* In the other word: contains non-partition slots or deterministic expression.
*
* B: Travel the predicate, only point on AND and OR operator, following the rule:
* (E and UE) -> (E and TRUE) -> E
* (UE and UE) -> TRUE
* (E and E) -> (E and E)
* (E or UE) -> TRUE
* (UE or UE) -> TRUE
* (E or E) -> (E or E)
*
* e.g.
* (part = 1 and non_part = 'a') or (part = 2)
* -> (part = 1 and true) or (part = 2)
* -> (part = 1) or (part = 2)
*
* It's better that do some expression optimize(like fold, eliminate etc.) on predicate before this step.
*/
public class PartitionPruneExpressionExtractor extends DefaultExpressionRewriter<Context> {
private final ExpressionEvaluableDetector expressionEvaluableDetector;

private PartitionPruneExpressionExtractor(Set<Slot> interestedSlots) {
this.expressionEvaluableDetector = new ExpressionEvaluableDetector(interestedSlots);
}

/**
* Extract partition prune expression from predicate
*/
public static Expression extract(Expression predicate,
Set<Slot> partitionSlots,
CascadesContext cascadesContext) {
predicate = predicate.accept(FoldConstantRuleOnFE.VISITOR_INSTANCE,
new ExpressionRewriteContext(cascadesContext));
PartitionPruneExpressionExtractor rewriter = new PartitionPruneExpressionExtractor(partitionSlots);
Context context = new Context();
Expression partitionPruneExpression = predicate.accept(rewriter, context);
if (context.containsUnEvaluableExpression) {
return BooleanLiteral.TRUE;
}
return partitionPruneExpression;
}

@Override
public Expression visit(Expression originExpr, Context parentContext) {
if (originExpr instanceof And) {
return this.visitAnd((And) originExpr, parentContext);
}
if (originExpr instanceof Or) {
return this.visitOr((Or) originExpr, parentContext);
}

parentContext.containsUnEvaluableExpression = !expressionEvaluableDetector.detect(originExpr);
return originExpr;
}

@Override
public Expression visitAnd(And node, Context parentContext) {
// handle left node
Context leftContext = new Context();
Expression newLeft = node.left().accept(this, leftContext);
// handle right node
Context rightContext = new Context();
Expression newRight = node.right().accept(this, rightContext);

// if anyone of them is FALSE, the whole expression should be FALSE.
if (newLeft == BooleanLiteral.FALSE || newRight == BooleanLiteral.FALSE) {
return BooleanLiteral.FALSE;
}

// If left node contains non-partition slot or is TURE, just discard it.
if (newLeft == BooleanLiteral.TRUE || leftContext.containsUnEvaluableExpression) {
return rightContext.containsUnEvaluableExpression ? BooleanLiteral.TRUE : newRight;
}

// If right node contains non-partition slot or is TURE, just discard it.
if (newRight == BooleanLiteral.TRUE || rightContext.containsUnEvaluableExpression) {
return newLeft;
}

// both does not contains non-partition slot.
return new And(newLeft, newRight);
}

@Override
public Expression visitOr(Or node, Context parentContext) {
// handle left node
Context leftContext = new Context();
Expression newLeft = node.left().accept(this, leftContext);
// handle right node
Context rightContext = new Context();
Expression newRight = node.right().accept(this, rightContext);

// if anyone of them is TRUE or contains non-partition slot, just return TRUE.
if (newLeft == BooleanLiteral.TRUE || newRight == BooleanLiteral.TRUE
|| leftContext.containsUnEvaluableExpression || rightContext.containsUnEvaluableExpression) {
return BooleanLiteral.TRUE;
}

return new Or(newLeft, newRight);
}

/**
* Context
*/
@VisibleForTesting
public static class Context {
private boolean containsUnEvaluableExpression;
}

/**
* The detector only indicate that whether a predicate contains interested slots or not,
* and do not change the predicate.
*/
@VisibleForTesting
public static class ExpressionEvaluableDetector extends DefaultExpressionRewriter<Context> {
private final Set<Slot> partitionSlots;

public ExpressionEvaluableDetector(Set<Slot> partitionSlots) {
this.partitionSlots = Objects.requireNonNull(partitionSlots, "partitionSlots can not be null");
}

/**
* Return true if expression does NOT contains un-evaluable expression.
*/
@VisibleForTesting
public boolean detect(Expression expression) {
boolean containsUnEvaluableExpression = expression.anyMatch(
expr -> expr instanceof SubqueryExpr || (expr instanceof Slot && !partitionSlots.contains(expr)));
return !containsUnEvaluableExpression;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public List<Long> prune() {
public static List<Long> prune(List<Slot> partitionSlots, Expression partitionPredicate,
Map<Long, PartitionItem> idToPartitions, CascadesContext cascadesContext,
PartitionTableType partitionTableType) {
partitionPredicate = TryEliminateUninterestedPredicates.rewrite(
partitionPredicate = PartitionPruneExpressionExtractor.extract(
partitionPredicate, ImmutableSet.copyOf(partitionSlots), cascadesContext);
partitionPredicate = PredicateRewriteForPartitionPrune.rewrite(partitionPredicate, cascadesContext);

Expand Down
Loading

0 comments on commit f4a732e

Please sign in to comment.