From c9651fd8c6b3f2e6b5017610b3335b62a7d984ee Mon Sep 17 00:00:00 2001 From: keliang Date: Tue, 26 Jul 2016 19:32:35 +0800 Subject: [PATCH 1/6] Summary: Fail to create a map contains decimal type with literals having different inferred precessions and scales JIRA_ID:SPARK-16735 Description: In Spark 2.0, we will parse float literals as decimals. However, it introduces a side-effect, which is described below. spark-sql> select map(0.1,0.01, 0.2,0.033); Error in query: cannot resolve 'map(CAST(0.1 AS DECIMAL(1,1)), CAST(0.01 AS DECIMAL(2,2)), CAST(0.2 AS DECIMAL(1,1)), CAST(0.033 AS DECIMAL(3,3)))' due to data type mismatch: The given values of function map should all be the same type, but they are [decimal(2,2), decimal(3,3)]; line 1 pos 7 Test: spark-sql> select map(0.1,0.01, 0.2,0.033); {0.1:0.010,0.2:0.033} Time taken: 2.448 seconds, Fetched 1 row(s) --- .../expressions/complexTypeCreator.scala | 37 ++++++++++++++++--- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 0ca715f42472..f0de23fe3ec8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -93,20 +93,45 @@ case class CreateMap(children: Seq[Expression]) extends Expression { if (children.size % 2 != 0) { TypeCheckResult.TypeCheckFailure(s"$prettyName expects a positive even number of arguments.") } else if (keys.map(_.dataType).distinct.length > 1) { - TypeCheckResult.TypeCheckFailure("The given keys of function map should all be the same " + - "type, but they are " + keys.map(_.dataType.simpleString).mkString("[", ", ", "]")) + if (keys.map(_.dataType).forall(_.isInstanceOf[DecimalType])) { + TypeCheckResult.TypeCheckSuccess + } else { + TypeCheckResult.TypeCheckFailure("The given keys of function map should all be the same " + + "type, but they are " + keys.map(_.dataType.simpleString).mkString("[", ", ", "]")) + } } else if (values.map(_.dataType).distinct.length > 1) { - TypeCheckResult.TypeCheckFailure("The given values of function map should all be the same " + - "type, but they are " + values.map(_.dataType.simpleString).mkString("[", ", ", "]")) + if (values.map(_.dataType).forall(_.isInstanceOf[DecimalType])) { + TypeCheckResult.TypeCheckSuccess + } else { + TypeCheckResult.TypeCheckFailure("The given values of function map should all be the " + + "same type, but they are " + values.map(_.dataType.simpleString).mkString("[", ", ", "]")) + } } else { TypeCheckResult.TypeCheckSuccess } } + private def checkDecimalType(colType: Seq[Expression]): DataType = { + val elementType = colType.headOption.map(_.dataType).getOrElse(NullType) + elementType match { + case _ if elementType.isInstanceOf[DecimalType] => + var tighter: DataType = elementType + colType.foreach { child => + if (elementType.asInstanceOf[DecimalType].isTighterThan(child.dataType)) { + tighter = child.dataType + } + } + + tighter + case _ => + elementType + } + } + override def dataType: DataType = { MapType( - keyType = keys.headOption.map(_.dataType).getOrElse(NullType), - valueType = values.headOption.map(_.dataType).getOrElse(NullType), + keyType = checkDecimalType(keys), + valueType = checkDecimalType(values), valueContainsNull = values.exists(_.nullable)) } From ba2560e25eebac7cfbc7e3cd65ae94ca2ceae6d4 Mon Sep 17 00:00:00 2001 From: keliang Date: Tue, 26 Jul 2016 21:31:09 +0800 Subject: [PATCH 2/6] Summary:Fail to create a map contains decimal type with literals having different inferred precessions and scales JIRA_ID:SPARK-16735 Description: In Spark 2.0, we will parse float literals as decimals. However, it introduces a side-effect, which is described below. spark-sql> select map(0.1,0.01, 0.2,0.033); Error in query: cannot resolve 'map(CAST(0.1 AS DECIMAL(1,1)), CAST(0.01 AS DECIMAL(2,2)), CAST(0.2 AS DECIMAL(1,1)), CAST(0.033 AS DECIMAL(3,3)))' due to data type mismatch: The given values of function map should all be the same type, but they are [decimal(2,2), decimal(3,3)]; line 1 pos 7 Test: spark-sql> select map(0.1,0.01, 0.2,0.033); {0.1:0.010,0.2:0.033} Time taken: 2.448 seconds, Fetched 1 row(s) --- .../sql/catalyst/expressions/ComplexTypeSuite.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index 0c307b2b8576..1fb43f3515ca 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue +import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedExtractValue} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -134,6 +134,16 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(CreateArray(Literal.create(null, IntegerType) :: Nil), null :: Nil) } + test("SPARK-16715: CreateMap with Decimals") { + val map1 = CreateMap(Seq(Literal(Decimal(0.02)), Literal(Decimal(0.001)), + Literal(Decimal(0.001)), Literal(Decimal(0.03)))) + + assert(map1.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + + checkEvaluation(map1, Seq(Decimal(0.020), Decimal(0.001), + Literal(Decimal(0.001)), Literal(Decimal(0.030)))) + } + test("CreateMap") { def interlace(keys: Seq[Literal], values: Seq[Literal]): Seq[Literal] = { keys.zip(values).flatMap { case (k, v) => Seq(k, v) } From 3fa2153dadb05762899481f8a4416741f0b4190a Mon Sep 17 00:00:00 2001 From: keliang Date: Wed, 27 Jul 2016 06:20:48 +0800 Subject: [PATCH 3/6] Summary: Fail to create a map contains decimal type with literals having different inferred precessions and scales JIRA_ID: SPARK-16735 Description: In Spark 2.0, we will parse float literals as decimals. However, it introduces a side-effect, which is described below. spark-sql> select map(0.1,0.01, 0.2,0.033); Error in query: cannot resolve 'map(CAST(0.1 AS DECIMAL(1,1)), CAST(0.01 AS DECIMAL(2,2)), CAST(0.2 AS DECIMAL(1,1)), CAST(0.033 AS DECIMAL(3,3)))' due to data type mismatch: The given values of function map should all be the same type, but they are [decimal(2,2), decimal(3,3)]; line 1 pos 7 Test:spark-sql> select map(0.1,0.01, 0.2,0.033); {0.1:0.010,0.2:0.033} Time taken: 2.448 seconds, Fetched 1 row(s) --- .../expressions/ComplexTypeSuite.scala | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index 1fb43f3515ca..f259e69162a5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -134,26 +134,29 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(CreateArray(Literal.create(null, IntegerType) :: Nil), null :: Nil) } + private def interlace(keys: Seq[Literal], values: Seq[Literal]): Seq[Literal] = { + keys.zip(values).flatMap { case (k, v) => Seq(k, v) } + } + + private def createMap(keys: Seq[Any], values: Seq[Any]): Map[Any, Any] = { + // catalyst map is order-sensitive, so we create ListMap here to preserve the elements order. + scala.collection.immutable.ListMap(keys.zip(values): _*) + } + test("SPARK-16715: CreateMap with Decimals") { - val map1 = CreateMap(Seq(Literal(Decimal(0.02)), Literal(Decimal(0.001)), - Literal(Decimal(0.001)), Literal(Decimal(0.03)))) + val keys = Seq(0.02, 0.004) + val values = Seq(0.001, 0.5) + val keys1 = Seq(0.020, 0.004) + val values1 = Seq(0.001, 0.500) + val map1 = CreateMap(interlace(keys.map(Literal(_)), values.map(Literal(_)))) assert(map1.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) - checkEvaluation(map1, Seq(Decimal(0.020), Decimal(0.001), - Literal(Decimal(0.001)), Literal(Decimal(0.030)))) + checkEvaluation(map1, createMap(keys1, values1)) + checkEvaluation(map1, createMap(keys, values)) } test("CreateMap") { - def interlace(keys: Seq[Literal], values: Seq[Literal]): Seq[Literal] = { - keys.zip(values).flatMap { case (k, v) => Seq(k, v) } - } - - def createMap(keys: Seq[Any], values: Seq[Any]): Map[Any, Any] = { - // catalyst map is order-sensitive, so we create ListMap here to preserve the elements order. - scala.collection.immutable.ListMap(keys.zip(values): _*) - } - val intSeq = Seq(5, 10, 15, 20, 25) val longSeq = intSeq.map(_.toLong) val strSeq = intSeq.map(_.toString) From 7143ed6fed34e726594b5cf30b49bc49fb8da4e8 Mon Sep 17 00:00:00 2001 From: keliang Date: Wed, 27 Jul 2016 07:55:36 +0800 Subject: [PATCH 4/6] Summary:fix jira_id in the test("SPARK-16735: CreateMap with Decimals") JIRA_ID:no Description:fix jira_id in the test("SPARK-16735: CreateMap with Decimals") Test:no --- .../spark/sql/catalyst/expressions/ComplexTypeSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index f259e69162a5..293f3aa8d1fc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -143,7 +143,7 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { scala.collection.immutable.ListMap(keys.zip(values): _*) } - test("SPARK-16715: CreateMap with Decimals") { + test("SPARK-16735: CreateMap with Decimals") { val keys = Seq(0.02, 0.004) val values = Seq(0.001, 0.5) val keys1 = Seq(0.020, 0.004) From 86b117191a0ae2bd05298116db0419ec1aeab48f Mon Sep 17 00:00:00 2001 From: keliang Date: Thu, 28 Jul 2016 08:37:04 +0800 Subject: [PATCH 5/6] Summary:change the isTighterThan to isDecimalTypeTighterThan JIRA_ID:SPARK-16735 Description: I have checked this function, and it will not lost any precision or range ,it 's safe . and in the checkDecimalType, we just check the datatype and do not change datatype. (when keys or values contains integer type , it will pass. but still integer type) so checkInputDataTypes will return result like it done before. and InCase when keys or values contains integer type, I will use a new function instead of isTighterThan that do not check integer type. Test:done --- .../catalyst/expressions/complexTypeCreator.scala | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index f0de23fe3ec8..c06a66fa89b1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -111,13 +111,25 @@ case class CreateMap(children: Seq[Expression]) extends Expression { } } + private def isDecimalTypeTighterThan(src: DecimalType, other: DataType): Boolean = other match { + case dt: DecimalType => + (src.precision - src.scale) <= (dt.precision - dt.scale) && src.scale <= dt.scale + case _ => false + } + + /** + * only check decimal type contains by the coltypes + * @param colType + * @return + */ private def checkDecimalType(colType: Seq[Expression]): DataType = { val elementType = colType.headOption.map(_.dataType).getOrElse(NullType) + elementType match { case _ if elementType.isInstanceOf[DecimalType] => var tighter: DataType = elementType colType.foreach { child => - if (elementType.asInstanceOf[DecimalType].isTighterThan(child.dataType)) { + if (isDecimalTypeTighterThan(tighter.asInstanceOf[DecimalType], child.dataType)) { tighter = child.dataType } } From c064621a123f3c7a3018cde48ab17e49e6f18c20 Mon Sep 17 00:00:00 2001 From: keliang Date: Wed, 3 Aug 2016 16:21:38 +0800 Subject: [PATCH 6/6] Summary:add "spark.sql.broadcastTimeout" into docs/sql-programming-guide.md JIRA_ID:SPARK-16870 Description:efault value for spark.sql.broadcastTimeout is 300s. and this property do not show in any docs of spark. so add "spark.sql.broadcastTimeout" into docs/sql-programming-guide.md to help people to how to fix this timeout error when it happenned Test:done --- docs/sql-programming-guide.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index d8c8698e31d3..fd5d70259ae5 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -995,6 +995,15 @@ The following options can be used to configure the version of Hive that is used

+ + spark.sql.broadcastTimeout + 300 + +

+ Timeout in seconds for the broadcast wait time in broadcast joins +

+ +