Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow boolean Expr simplification even when nullable #12746

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 13 additions & 27 deletions datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -838,22 +838,18 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
op: Or,
right,
}) if expr_contains(&right, &left, Or) => Transformed::yes(*right),
// A OR (A AND B) --> A (if B not null)
// A OR (A AND B) --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if !info.nullable(&right)? && is_op_with(And, &right, &left) => {
Transformed::yes(*left)
}
// (A AND B) OR A --> A (if B not null)
}) if is_op_with(And, &right, &left) => Transformed::yes(*left),
// (A AND B) OR A --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: Or,
right,
}) if !info.nullable(&left)? && is_op_with(And, &left, &right) => {
Transformed::yes(*right)
}
}) if is_op_with(And, &left, &right) => Transformed::yes(*right),

//
// Rules for AND
Expand Down Expand Up @@ -911,22 +907,18 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> {
op: And,
right,
}) if expr_contains(&right, &left, And) => Transformed::yes(*right),
// A AND (A OR B) --> A (if B not null)
// A AND (A OR B) --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if !info.nullable(&right)? && is_op_with(Or, &right, &left) => {
Transformed::yes(*left)
}
// (A OR B) AND A --> A (if B not null)
}) if is_op_with(Or, &right, &left) => Transformed::yes(*left),
// (A OR B) AND A --> A
Expr::BinaryExpr(BinaryExpr {
left,
op: And,
right,
}) if !info.nullable(&left)? && is_op_with(Or, &left, &right) => {
Transformed::yes(*right)
}
}) if is_op_with(Or, &left, &right) => Transformed::yes(*right),

//
// Rules for Multiply
Expand Down Expand Up @@ -2609,15 +2601,11 @@ mod tests {
// (c2 > 5) OR ((c1 < 6) AND (c2 > 5))
let expr = or(l.clone(), r.clone());

// no rewrites if c1 can be null
let expected = expr.clone();
let expected = l.clone();
assert_eq!(simplify(expr), expected);

// ((c1 < 6) AND (c2 > 5)) OR (c2 > 5)
let expr = or(l, r);

// no rewrites if c1 can be null
let expected = expr.clone();
let expr = or(r, l);
assert_eq!(simplify(expr), expected);
}

Expand Down Expand Up @@ -2648,13 +2636,11 @@ mod tests {
// (c2 > 5) AND ((c1 < 6) OR (c2 > 5)) --> c2 > 5
let expr = and(l.clone(), r.clone());

// no rewrites if c1 can be null
let expected = expr.clone();
let expected = l.clone();
assert_eq!(simplify(expr), expected);

// ((c1 < 6) OR (c2 > 5)) AND (c2 > 5) --> c2 > 5
let expr = and(l, r);
let expected = expr.clone();
let expr = and(r, l);
assert_eq!(simplify(expr), expected);
}

Expand Down Expand Up @@ -3223,7 +3209,7 @@ mod tests {
)],
Some(Box::new(col("c2").eq(lit(true)))),
)))),
col("c2").or(col("c2").not().and(col("c2"))) // #1716
col("c2")
);

// CASE WHEN ISNULL(c2) THEN true ELSE c2
Expand Down
16 changes: 8 additions & 8 deletions datafusion/sqllogictest/test_files/cse.slt
Original file line number Diff line number Diff line change
Expand Up @@ -179,20 +179,20 @@ physical_plan
# Surely only once but also conditionally evaluated expressions
query TT
EXPLAIN SELECT
(a = 1 OR random() = 0) AND a = 1 AS c1,
(a = 2 AND random() = 0) OR a = 2 AS c2,
(a = 1 OR random() = 0) AND a = 2 AS c1,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes are neccessary to avoid us simplifiying these conditions and therefore changing what is gets tested here.

(a = 2 AND random() = 0) OR a = 1 AS c2,
CASE WHEN a + 3 = 0 THEN a + 3 ELSE 0 END AS c3,
CASE WHEN a + 4 = 0 THEN 0 WHEN a + 4 THEN 0 ELSE 0 END AS c4,
CASE WHEN a + 5 = 0 THEN 0 WHEN random() = 0 THEN a + 5 ELSE 0 END AS c5,
CASE WHEN a + 6 = 0 THEN 0 ELSE a + 6 END AS c6
FROM t1
----
logical_plan
01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_1 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_2 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6
01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_2 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_1 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6
02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS __common_expr_4, t1.a + Float64(5) AS __common_expr_5, t1.a + Float64(6) AS __common_expr_6
03)----TableScan: t1 projection=[a]
physical_plan
01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_1@0 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_2@1 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6]
01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_2@1 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_1@0 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6]
02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4, a@0 + 5 as __common_expr_5, a@0 + 6 as __common_expr_6]
03)----MemoryExec: partitions=1, partition_sizes=[0]

Expand All @@ -217,17 +217,17 @@ physical_plan
# Only conditionally evaluated expressions
query TT
EXPLAIN SELECT
(random() = 0 OR a = 1) AND a = 1 AS c1,
(random() = 0 AND a = 2) OR a = 2 AS c2,
(random() = 0 OR a = 1) AND a = 2 AS c1,
(random() = 0 AND a = 2) OR a = 1 AS c2,
CASE WHEN random() = 0 THEN a + 3 ELSE a + 3 END AS c3,
CASE WHEN random() = 0 THEN 0 WHEN a + 4 = 0 THEN a + 4 ELSE 0 END AS c4,
CASE WHEN random() = 0 THEN 0 WHEN a + 5 = 0 THEN 0 ELSE a + 5 END AS c5,
CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a + 6 ELSE a + 6 END AS c6
FROM t1
----
logical_plan
01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(1) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(2) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6
01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(2) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(1) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6
02)--TableScan: t1 projection=[a]
physical_plan
01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 1 as c1, random() = 0 AND a@0 = 2 OR a@0 = 2 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6]
01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 2 as c1, random() = 0 AND a@0 = 2 OR a@0 = 1 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6]
02)--MemoryExec: partitions=1, partition_sizes=[0]