From f9e87f07d7bc8020d0f1409019dafe01487f41ad Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 27 Aug 2020 18:43:27 -0700 Subject: [PATCH 1/7] SPARK-32721 --- .../sql/catalyst/optimizer/Optimizer.scala | 1 + .../sql/catalyst/optimizer/expressions.scala | 11 +++++ .../catalyst/optimizer/SimplifyIfSuite.scala | 49 +++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyIfSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index bcdc5cd942e3..7ec6a9a8e30c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -103,6 +103,7 @@ abstract class Optimizer(catalogManager: CatalogManager) ReplaceNullWithFalseInPredicate, PruneFilters, SimplifyCasts, + SimplifyIf, SimplifyCaseConversionExpressions, RewriteCorrelatedScalarSubquery, EliminateSerialization, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 773ee7708aea..fe0aa45557dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -716,6 +716,17 @@ object SimplifyCasts extends Rule[LogicalPlan] { } } +/** + * Simplify if clauses of pattern `if(p, null, true|false)`, by replacing them with + * AND or OR clauses which are simpler and can better be pushed down + */ +object SimplifyIf extends Rule[LogicalPlan] { + val nullLiteral = Literal(null, BooleanType) + def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { + case If(p, Literal(null, _), FalseLiteral) => And(p, nullLiteral) + case If(p, Literal(null, _), TrueLiteral) => Or(p, nullLiteral) + } +} /** * Removes nodes that are not necessary. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyIfSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyIfSuite.scala new file mode 100644 index 000000000000..0531998f80b0 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyIfSuite.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.types.BooleanType + +class SimplifyIfSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = Batch("SimplifyIf", FixedPoint(1), SimplifyIf) :: Nil + } + + val testRelation = LocalRelation('a.int) + + test("simplify if when null is on the then branch") { + val nullLiteral = Literal(null, BooleanType) + assertEquivalent(If('a > 42, nullLiteral, false), And('a > 42, nullLiteral)) + assertEquivalent(If('a > 42, nullLiteral, true), Or('a > 42, nullLiteral)) + } + + private def assertEquivalent(e1: Expression, e2: Expression) = { + val plan = testRelation.where(e1).analyze + val actual = Optimize.execute(plan) + val expected = testRelation.where(e2).analyze + comparePlans(actual, expected) + } + +} From e4d929cae26b0e180ba31830fa32949e1dc44b60 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 28 Aug 2020 10:51:08 -0700 Subject: [PATCH 2/7] Fix bugs and merge into SimplifyConditionals --- .../sql/catalyst/optimizer/Optimizer.scala | 1 - .../sql/catalyst/optimizer/expressions.scala | 15 +----- .../optimizer/SimplifyConditionalSuite.scala | 21 +++++++- .../catalyst/optimizer/SimplifyIfSuite.scala | 49 ------------------- 4 files changed, 22 insertions(+), 64 deletions(-) delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyIfSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 7ec6a9a8e30c..bcdc5cd942e3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -103,7 +103,6 @@ abstract class Optimizer(catalogManager: CatalogManager) ReplaceNullWithFalseInPredicate, PruneFilters, SimplifyCasts, - SimplifyIf, SimplifyCaseConversionExpressions, RewriteCorrelatedScalarSubquery, EliminateSerialization, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index fe0aa45557dd..322619c7c1db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet import scala.collection.mutable.{ArrayBuffer, Stack} - import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} @@ -463,6 +462,8 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { case If(Literal(null, _), _, falseValue) => falseValue case If(cond, trueValue, falseValue) if cond.deterministic && trueValue.semanticEquals(falseValue) => trueValue + case If(p, l @ Literal(null, _), FalseLiteral) if !p.nullable => And(p, l) + case If(p, l @ Literal(null, _), TrueLiteral) if !p.nullable => Or(Not(p), l) case e @ CaseWhen(branches, elseValue) if branches.exists(x => falseOrNullLiteral(x._1)) => // If there are branches that are always false, remove them. @@ -716,18 +717,6 @@ object SimplifyCasts extends Rule[LogicalPlan] { } } -/** - * Simplify if clauses of pattern `if(p, null, true|false)`, by replacing them with - * AND or OR clauses which are simpler and can better be pushed down - */ -object SimplifyIf extends Rule[LogicalPlan] { - val nullLiteral = Literal(null, BooleanType) - def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { - case If(p, Literal(null, _), FalseLiteral) => And(p, nullLiteral) - case If(p, Literal(null, _), TrueLiteral) => Or(p, nullLiteral) - } -} - /** * Removes nodes that are not necessary. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala index 7b4ef54c627e..57195bda794d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types.{BooleanType, IntegerType} -class SimplifyConditionalSuite extends PlanTest with PredicateHelper { +class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with PredicateHelper { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyConditionals", FixedPoint(50), @@ -165,4 +165,23 @@ class SimplifyConditionalSuite extends PlanTest with PredicateHelper { Literal(1)) ) } + + test("simplify if when then clause is null and else clause is boolean") { + val p = IsNull('a) + val nullLiteral = Literal(null, BooleanType) + assertEquivalent(If(p, nullLiteral, FalseLiteral), And(p, nullLiteral)) + assertEquivalent(If(p, nullLiteral, TrueLiteral), Or(IsNotNull('a), nullLiteral)) + + // the rule should not apply to nullable predicate + Seq(TrueLiteral, FalseLiteral).foreach { b => + assertEquivalent(If(GreaterThan('a, 42), nullLiteral, b), + If(GreaterThan('a, 42), nullLiteral, b)) + } + + // check evaluation also + Seq(TrueLiteral, FalseLiteral).foreach { p => + checkEvaluation(If(p, nullLiteral, FalseLiteral), And(p, nullLiteral).eval(EmptyRow)) + checkEvaluation(If(p, nullLiteral, TrueLiteral), Or(Not(p), nullLiteral).eval(EmptyRow)) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyIfSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyIfSuite.scala deleted file mode 100644 index 0531998f80b0..000000000000 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyIfSuite.scala +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.optimizer - -import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.rules.RuleExecutor -import org.apache.spark.sql.types.BooleanType - -class SimplifyIfSuite extends PlanTest { - - object Optimize extends RuleExecutor[LogicalPlan] { - val batches = Batch("SimplifyIf", FixedPoint(1), SimplifyIf) :: Nil - } - - val testRelation = LocalRelation('a.int) - - test("simplify if when null is on the then branch") { - val nullLiteral = Literal(null, BooleanType) - assertEquivalent(If('a > 42, nullLiteral, false), And('a > 42, nullLiteral)) - assertEquivalent(If('a > 42, nullLiteral, true), Or('a > 42, nullLiteral)) - } - - private def assertEquivalent(e1: Expression, e2: Expression) = { - val plan = testRelation.where(e1).analyze - val actual = Optimize.execute(plan) - val expected = testRelation.where(e2).analyze - comparePlans(actual, expected) - } - -} From bb7c38cdd5f139a7cbcf04de6ed59d8fd0313bd4 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 28 Aug 2020 10:55:02 -0700 Subject: [PATCH 3/7] Fix lint --- .../org/apache/spark/sql/catalyst/optimizer/expressions.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 322619c7c1db..55238b2bc7cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet import scala.collection.mutable.{ArrayBuffer, Stack} + import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} @@ -717,6 +718,7 @@ object SimplifyCasts extends Rule[LogicalPlan] { } } + /** * Removes nodes that are not necessary. */ From cc661984f3ccbf59bbabb91c7d92b17524ab74d3 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 28 Aug 2020 14:02:50 -0700 Subject: [PATCH 4/7] Fix test failure --- .../catalyst/optimizer/BooleanSimplificationSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala index a8b8417754b0..03d75340e31e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala @@ -221,14 +221,14 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with test("Complementation Laws - null handling") { checkCondition('e && !'e, - testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze) + testRelationWithData.where(And(Literal(null, BooleanType), 'e.isNull)).analyze) checkCondition(!'e && 'e, - testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze) + testRelationWithData.where(And(Literal(null, BooleanType), 'e.isNull)).analyze) checkCondition('e || !'e, - testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze) + testRelationWithData.where(Or('e.isNotNull, Literal(null, BooleanType))).analyze) checkCondition(!'e || 'e, - testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze) + testRelationWithData.where(Or('e.isNotNull, Literal(null, BooleanType))).analyze) } test("Complementation Laws - negative case") { From 60e0e925957ff3e1b6e240792ad798e748662173 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sun, 30 Aug 2020 01:01:47 -0700 Subject: [PATCH 5/7] Address comments --- .../apache/spark/sql/catalyst/optimizer/expressions.scala | 4 ++-- .../sql/catalyst/optimizer/SimplifyConditionalSuite.scala | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 55238b2bc7cc..67f7cd955ee7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -463,8 +463,8 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper { case If(Literal(null, _), _, falseValue) => falseValue case If(cond, trueValue, falseValue) if cond.deterministic && trueValue.semanticEquals(falseValue) => trueValue - case If(p, l @ Literal(null, _), FalseLiteral) if !p.nullable => And(p, l) - case If(p, l @ Literal(null, _), TrueLiteral) if !p.nullable => Or(Not(p), l) + case If(cond, l @ Literal(null, _), FalseLiteral) if !cond.nullable => And(cond, l) + case If(cond, l @ Literal(null, _), TrueLiteral) if !cond.nullable => Or(Not(cond), l) case e @ CaseWhen(branches, elseValue) if branches.exists(x => falseOrNullLiteral(x._1)) => // If there are branches that are always false, remove them. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala index 57195bda794d..6d62ea328ae7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala @@ -179,9 +179,9 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P } // check evaluation also - Seq(TrueLiteral, FalseLiteral).foreach { p => - checkEvaluation(If(p, nullLiteral, FalseLiteral), And(p, nullLiteral).eval(EmptyRow)) - checkEvaluation(If(p, nullLiteral, TrueLiteral), Or(Not(p), nullLiteral).eval(EmptyRow)) + Seq(TrueLiteral, FalseLiteral).foreach { b => + checkEvaluation(If(b, nullLiteral, FalseLiteral), And(b, nullLiteral).eval(EmptyRow)) + checkEvaluation(If(b, nullLiteral, TrueLiteral), Or(Not(b), nullLiteral).eval(EmptyRow)) } } } From 2b90c195a42300e981589ba5c946d95156c24342 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 31 Aug 2020 00:54:47 -0700 Subject: [PATCH 6/7] Test if evaluation with nullable condition --- .../sql/catalyst/optimizer/SimplifyConditionalSuite.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala index 6d62ea328ae7..1d0793bc8650 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala @@ -183,5 +183,12 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P checkEvaluation(If(b, nullLiteral, FalseLiteral), And(b, nullLiteral).eval(EmptyRow)) checkEvaluation(If(b, nullLiteral, TrueLiteral), Or(Not(b), nullLiteral).eval(EmptyRow)) } + + // should have no effect on expressions with nullable if condition + assert((Factorial(5) > 100L).nullable) + Seq(TrueLiteral, FalseLiteral).foreach { b => + checkEvaluation(If(Factorial(5) > 100L, nullLiteral, b), + If(Factorial(5) > 100L, nullLiteral, b).eval(EmptyRow)) + } } } From 71bd033bfa58365ceabd108a791fd0748a27e1e0 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 31 Aug 2020 13:44:58 -0700 Subject: [PATCH 7/7] Fix indentation --- .../spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala index 1d0793bc8650..7a186a62dec3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala @@ -190,5 +190,5 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P checkEvaluation(If(Factorial(5) > 100L, nullLiteral, b), If(Factorial(5) > 100L, nullLiteral, b).eval(EmptyRow)) } - } + } }