From 9774605c6be7bc5f41267c37f66567a498ea9156 Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Thu, 28 Jul 2016 00:34:39 -0700 Subject: [PATCH 1/7] [SPARK-16714][SQL] map, struct function should accept decimals with different precision/scale --- .../sql/catalyst/analysis/TypeCoercion.scala | 8 ++-- .../spark/sql/SQLTypeCoercionSuite.scala | 44 +++++++++++++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 9a040f8644fb5..e35f3169cc0a8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -49,7 +49,7 @@ object TypeCoercion { InConversion :: WidenSetOperationTypes :: PromoteStrings :: - DecimalPrecision :: + DecimalPrecision :: // DecimalPrecision must happen before ImplicitTypeCasts. BooleanEquality :: StringToIntegralCasts :: FunctionArgumentConversion :: @@ -440,7 +440,7 @@ object TypeCoercion { case a @ CreateArray(children) if !haveSameType(children) => val types = children.map(_.dataType) - findTightestCommonTypeAndPromoteToString(types) match { + findWiderCommonType(types) match { case Some(finalDataType) => CreateArray(children.map(Cast(_, finalDataType))) case None => a } @@ -451,7 +451,7 @@ object TypeCoercion { m.keys } else { val types = m.keys.map(_.dataType) - findTightestCommonTypeAndPromoteToString(types) match { + findWiderCommonType(types) match { case Some(finalDataType) => m.keys.map(Cast(_, finalDataType)) case None => m.keys } @@ -461,7 +461,7 @@ object TypeCoercion { m.values } else { val types = m.values.map(_.dataType) - findTightestCommonTypeAndPromoteToString(types) match { + findWiderCommonType(types) match { case Some(finalDataType) => m.values.map(Cast(_, finalDataType)) case None => m.values } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala new file mode 100644 index 0000000000000..0d4c14b560e51 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import java.math.BigDecimal + +import org.apache.spark.sql.test.SharedSQLContext + +/** + * End-to-end tests for type coercion. + */ +class SQLTypeCoercionSuite extends QueryTest with SharedSQLContext { + + test("SPARK-16714 decimal in map and struct") { + val v1 = new BigDecimal(1).divide(new BigDecimal(1000)) + val v2 = new BigDecimal(1).divide(new BigDecimal(10)).setScale(3) + + checkAnswer( + sql("select map(0.001, 0.001, 0.1, 0.1)"), + Row(Map(v1 -> v1, v2 -> v2)) + ) + + checkAnswer( + sql("select array(0.001, 0.1)"), + Row(Seq(v1, v2)) + ) + } + +} From 57bbe812c83a4108dc34ee8087128968b23000a9 Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Fri, 29 Jul 2016 00:50:28 -0700 Subject: [PATCH 2/7] Make it more consistent. --- .../sql/catalyst/analysis/DecimalPrecision.scala | 1 + .../spark/sql/catalyst/analysis/TypeCoercion.scala | 4 ++-- .../sql/catalyst/expressions/nullExpressions.scala | 8 ++++---- .../apache/spark/sql/SQLTypeCoercionSuite.scala | 14 ++++++++++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala index 9c38dd2ee4e53..9944a0fa84ff7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala @@ -67,6 +67,7 @@ object DecimalPrecision extends Rule[LogicalPlan] { def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType = { widerDecimalType(d1.precision, d1.scale, d2.precision, d2.scale) } + // max(s1, s2) + max(p1-s1, p2-s2), max(s1, s2) def widerDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = { val scale = max(s1, s2) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index e35f3169cc0a8..a582fd15a8fb1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -496,14 +496,14 @@ object TypeCoercion { case g @ Greatest(children) if !haveSameType(children) => val types = children.map(_.dataType) - findTightestCommonType(types) match { + findWiderCommonType(types) match { case Some(finalDataType) => Greatest(children.map(Cast(_, finalDataType))) case None => g } case l @ Least(children) if !haveSameType(children) => val types = children.map(_.dataType) - findTightestCommonType(types) match { + findWiderCommonType(types) match { case Some(finalDataType) => Least(children.map(Cast(_, finalDataType))) case None => l } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala index 1c18265e0fed4..0b4f492a118d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala @@ -96,7 +96,7 @@ case class IfNull(left: Expression, right: Expression) extends RuntimeReplaceabl override def replaceForTypeCoercion(): Expression = { if (left.dataType != right.dataType) { - TypeCoercion.findTightestCommonTypeOfTwo(left.dataType, right.dataType).map { dtype => + TypeCoercion.findWiderTypeForTwo(left.dataType, right.dataType).map { dtype => copy(left = Cast(left, dtype), right = Cast(right, dtype)) }.getOrElse(this) } else { @@ -116,7 +116,7 @@ case class NullIf(left: Expression, right: Expression) extends RuntimeReplaceabl override def replaceForTypeCoercion(): Expression = { if (left.dataType != right.dataType) { - TypeCoercion.findTightestCommonTypeOfTwo(left.dataType, right.dataType).map { dtype => + TypeCoercion.findWiderTypeForTwo(left.dataType, right.dataType).map { dtype => copy(left = Cast(left, dtype), right = Cast(right, dtype)) }.getOrElse(this) } else { @@ -134,7 +134,7 @@ case class Nvl(left: Expression, right: Expression) extends RuntimeReplaceable { override def replaceForTypeCoercion(): Expression = { if (left.dataType != right.dataType) { - TypeCoercion.findTightestCommonTypeToString(left.dataType, right.dataType).map { dtype => + TypeCoercion.findWiderTypeForTwo(left.dataType, right.dataType).map { dtype => copy(left = Cast(left, dtype), right = Cast(right, dtype)) }.getOrElse(this) } else { @@ -154,7 +154,7 @@ case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression) override def replaceForTypeCoercion(): Expression = { if (expr2.dataType != expr3.dataType) { - TypeCoercion.findTightestCommonTypeOfTwo(expr2.dataType, expr3.dataType).map { dtype => + TypeCoercion.findWiderTypeForTwo(expr2.dataType, expr3.dataType).map { dtype => copy(expr2 = Cast(expr2, dtype), expr3 = Cast(expr3, dtype)) }.getOrElse(this) } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala index 0d4c14b560e51..0123ca846409c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala @@ -39,6 +39,20 @@ class SQLTypeCoercionSuite extends QueryTest with SharedSQLContext { sql("select array(0.001, 0.1)"), Row(Seq(v1, v2)) ) + + checkAnswer( + sql("select greatest(0.001, 0.1), least(0.001, 0.1)"), + Row(v2, v1) + ) + + checkAnswer( + sql( + """ + |select ifnull(0.001, 0.1), nullif(0.001, 0.1), nvl2(0.001, 0.001, 0.1), nvl(0.001, 0.1), + | if(true, 0.001, 0.1) + """.stripMargin), + Row(v1, v1, v1, v1, v1) + ) } } From afca0032e5f1365fdd6face07cedec44bb000e25 Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Fri, 29 Jul 2016 00:50:53 -0700 Subject: [PATCH 3/7] Fix compile --- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index a582fd15a8fb1..533f62cbc32c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -137,7 +137,7 @@ object TypeCoercion { * i.e. the main difference with [[findTightestCommonTypeOfTwo]] is that here we allow some * loss of precision when widening decimal and double. */ - private def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match { + def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match { case (t1: DecimalType, t2: DecimalType) => Some(DecimalPrecision.widerDecimalType(t1, t2)) case (t: IntegralType, d: DecimalType) => From 929c39ffae1a860c39216041cc994d6e577cc6e5 Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Fri, 29 Jul 2016 00:55:13 -0700 Subject: [PATCH 4/7] Remove unused methods. --- .../sql/catalyst/analysis/TypeCoercion.scala | 40 ++++--------------- 1 file changed, 7 insertions(+), 33 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 533f62cbc32c4..8f06a033dcee1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -99,38 +99,6 @@ object TypeCoercion { case _ => None } - /** Similar to [[findTightestCommonType]], but can promote all the way to StringType. */ - def findTightestCommonTypeToString(left: DataType, right: DataType): Option[DataType] = { - findTightestCommonTypeOfTwo(left, right).orElse((left, right) match { - case (StringType, t2: AtomicType) if t2 != BinaryType && t2 != BooleanType => Some(StringType) - case (t1: AtomicType, StringType) if t1 != BinaryType && t1 != BooleanType => Some(StringType) - case _ => None - }) - } - - /** - * Similar to [[findTightestCommonType]], if can not find the TightestCommonType, try to use - * [[findTightestCommonTypeToString]] to find the TightestCommonType. - */ - private def findTightestCommonTypeAndPromoteToString(types: Seq[DataType]): Option[DataType] = { - types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match { - case None => None - case Some(d) => - findTightestCommonTypeToString(d, c) - }) - } - - /** - * Find the tightest common type of a set of types by continuously applying - * `findTightestCommonTypeOfTwo` on these types. - */ - private def findTightestCommonType(types: Seq[DataType]): Option[DataType] = { - types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match { - case None => None - case Some(d) => findTightestCommonTypeOfTwo(d, c) - }) - } - /** * Case 2 type widening (see the classdoc comment above for TypeCoercion). * @@ -147,7 +115,13 @@ object TypeCoercion { case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) => Some(DoubleType) case _ => - findTightestCommonTypeToString(t1, t2) + findTightestCommonTypeOfTwo(t1, t2).orElse((t1, t2) match { + case (StringType, t2: AtomicType) if t2 != BinaryType && t2 != BooleanType => + Some(StringType) + case (t1: AtomicType, StringType) if t1 != BinaryType && t1 != BooleanType => + Some(StringType) + case _ => None + }) } private def findWiderCommonType(types: Seq[DataType]) = { From 071b01d8a0cabefe3f016f7de6fa91e23821c514 Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Fri, 29 Jul 2016 01:04:04 -0700 Subject: [PATCH 5/7] Rename test case --- .../test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala index 0123ca846409c..9d758e99d90ca 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLTypeCoercionSuite.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.test.SharedSQLContext */ class SQLTypeCoercionSuite extends QueryTest with SharedSQLContext { - test("SPARK-16714 decimal in map and struct") { + test("SPARK-16714 decimal widening") { val v1 = new BigDecimal(1).divide(new BigDecimal(1000)) val v2 = new BigDecimal(1).divide(new BigDecimal(10)).setScale(3) From b9f94febaf0abbee95caab1fa7d7eed4e6748382 Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Fri, 29 Jul 2016 10:09:53 -0700 Subject: [PATCH 6/7] Fix test --- .../sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala index 76e42d9afa4c3..50b369d15ac70 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala @@ -215,8 +215,6 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite { test("check types for Greatest/Least") { for (operator <- Seq[(Seq[Expression] => Expression)](Greatest, Least)) { assertError(operator(Seq('booleanField)), "requires at least 2 arguments") - assertError(operator(Seq('intField, 'stringField)), "should all have the same type") - assertError(operator(Seq('intField, 'decimalField)), "should all have the same type") assertError(operator(Seq('mapField, 'mapField)), "does not support ordering") } } From ffd17346f48715aad09280801bffefd1eb30256b Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Fri, 29 Jul 2016 21:35:41 -0700 Subject: [PATCH 7/7] Code review --- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 8f06a033dcee1..f0dbac4d00630 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -103,7 +103,7 @@ object TypeCoercion { * Case 2 type widening (see the classdoc comment above for TypeCoercion). * * i.e. the main difference with [[findTightestCommonTypeOfTwo]] is that here we allow some - * loss of precision when widening decimal and double. + * loss of precision when widening decimal and double, and also widen all the way to string type. */ def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match { case (t1: DecimalType, t2: DecimalType) =>