From c9651fd8c6b3f2e6b5017610b3335b62a7d984ee Mon Sep 17 00:00:00 2001
From: keliang
Date: Tue, 26 Jul 2016 19:32:35 +0800
Subject: [PATCH 1/6] Summary: Fail to create a map contains decimal type with
literals having different inferred precessions and scales JIRA_ID:SPARK-16735
Description: In Spark 2.0, we will parse float literals as decimals. However,
it introduces a side-effect, which is described below. spark-sql> select
map(0.1,0.01, 0.2,0.033); Error in query: cannot resolve 'map(CAST(0.1 AS
DECIMAL(1,1)), CAST(0.01 AS DECIMAL(2,2)), CAST(0.2 AS DECIMAL(1,1)),
CAST(0.033 AS DECIMAL(3,3)))' due to data type mismatch: The given values of
function map should all be the same type, but they are [decimal(2,2),
decimal(3,3)]; line 1 pos 7 Test: spark-sql> select map(0.1,0.01, 0.2,0.033);
{0.1:0.010,0.2:0.033} Time taken: 2.448 seconds, Fetched 1 row(s)
---
.../expressions/complexTypeCreator.scala | 37 ++++++++++++++++---
1 file changed, 31 insertions(+), 6 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 0ca715f42472..f0de23fe3ec8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -93,20 +93,45 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
if (children.size % 2 != 0) {
TypeCheckResult.TypeCheckFailure(s"$prettyName expects a positive even number of arguments.")
} else if (keys.map(_.dataType).distinct.length > 1) {
- TypeCheckResult.TypeCheckFailure("The given keys of function map should all be the same " +
- "type, but they are " + keys.map(_.dataType.simpleString).mkString("[", ", ", "]"))
+ if (keys.map(_.dataType).forall(_.isInstanceOf[DecimalType])) {
+ TypeCheckResult.TypeCheckSuccess
+ } else {
+ TypeCheckResult.TypeCheckFailure("The given keys of function map should all be the same " +
+ "type, but they are " + keys.map(_.dataType.simpleString).mkString("[", ", ", "]"))
+ }
} else if (values.map(_.dataType).distinct.length > 1) {
- TypeCheckResult.TypeCheckFailure("The given values of function map should all be the same " +
- "type, but they are " + values.map(_.dataType.simpleString).mkString("[", ", ", "]"))
+ if (values.map(_.dataType).forall(_.isInstanceOf[DecimalType])) {
+ TypeCheckResult.TypeCheckSuccess
+ } else {
+ TypeCheckResult.TypeCheckFailure("The given values of function map should all be the " +
+ "same type, but they are " + values.map(_.dataType.simpleString).mkString("[", ", ", "]"))
+ }
} else {
TypeCheckResult.TypeCheckSuccess
}
}
+ private def checkDecimalType(colType: Seq[Expression]): DataType = {
+ val elementType = colType.headOption.map(_.dataType).getOrElse(NullType)
+ elementType match {
+ case _ if elementType.isInstanceOf[DecimalType] =>
+ var tighter: DataType = elementType
+ colType.foreach { child =>
+ if (elementType.asInstanceOf[DecimalType].isTighterThan(child.dataType)) {
+ tighter = child.dataType
+ }
+ }
+
+ tighter
+ case _ =>
+ elementType
+ }
+ }
+
override def dataType: DataType = {
MapType(
- keyType = keys.headOption.map(_.dataType).getOrElse(NullType),
- valueType = values.headOption.map(_.dataType).getOrElse(NullType),
+ keyType = checkDecimalType(keys),
+ valueType = checkDecimalType(values),
valueContainsNull = values.exists(_.nullable))
}
From ba2560e25eebac7cfbc7e3cd65ae94ca2ceae6d4 Mon Sep 17 00:00:00 2001
From: keliang
Date: Tue, 26 Jul 2016 21:31:09 +0800
Subject: [PATCH 2/6] Summary:Fail to create a map contains decimal type with
literals having different inferred precessions and scales JIRA_ID:SPARK-16735
Description: In Spark 2.0, we will parse float literals as decimals. However,
it introduces a side-effect, which is described below. spark-sql> select
map(0.1,0.01, 0.2,0.033); Error in query: cannot resolve 'map(CAST(0.1 AS
DECIMAL(1,1)), CAST(0.01 AS DECIMAL(2,2)), CAST(0.2 AS DECIMAL(1,1)),
CAST(0.033 AS DECIMAL(3,3)))' due to data type mismatch: The given values of
function map should all be the same type, but they are [decimal(2,2),
decimal(3,3)]; line 1 pos 7 Test: spark-sql> select map(0.1,0.01, 0.2,0.033);
{0.1:0.010,0.2:0.033} Time taken: 2.448 seconds, Fetched 1 row(s)
---
.../sql/catalyst/expressions/ComplexTypeSuite.scala | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 0c307b2b8576..1fb43f3515ca 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
+import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedExtractValue}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@@ -134,6 +134,16 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(CreateArray(Literal.create(null, IntegerType) :: Nil), null :: Nil)
}
+ test("SPARK-16715: CreateMap with Decimals") {
+ val map1 = CreateMap(Seq(Literal(Decimal(0.02)), Literal(Decimal(0.001)),
+ Literal(Decimal(0.001)), Literal(Decimal(0.03))))
+
+ assert(map1.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+
+ checkEvaluation(map1, Seq(Decimal(0.020), Decimal(0.001),
+ Literal(Decimal(0.001)), Literal(Decimal(0.030))))
+ }
+
test("CreateMap") {
def interlace(keys: Seq[Literal], values: Seq[Literal]): Seq[Literal] = {
keys.zip(values).flatMap { case (k, v) => Seq(k, v) }
From 3fa2153dadb05762899481f8a4416741f0b4190a Mon Sep 17 00:00:00 2001
From: keliang
Date: Wed, 27 Jul 2016 06:20:48 +0800
Subject: [PATCH 3/6] Summary: Fail to create a map contains decimal type with
literals having different inferred precessions and scales JIRA_ID:
SPARK-16735 Description: In Spark 2.0, we will parse float literals as
decimals. However, it introduces a side-effect, which is described below.
spark-sql> select map(0.1,0.01, 0.2,0.033); Error in query: cannot resolve
'map(CAST(0.1 AS DECIMAL(1,1)), CAST(0.01 AS DECIMAL(2,2)), CAST(0.2 AS
DECIMAL(1,1)), CAST(0.033 AS DECIMAL(3,3)))' due to data type mismatch: The
given values of function map should all be the same type, but they are
[decimal(2,2), decimal(3,3)]; line 1 pos 7 Test:spark-sql> select
map(0.1,0.01, 0.2,0.033); {0.1:0.010,0.2:0.033} Time taken: 2.448 seconds,
Fetched 1 row(s)
---
.../expressions/ComplexTypeSuite.scala | 29 ++++++++++---------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 1fb43f3515ca..f259e69162a5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -134,26 +134,29 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(CreateArray(Literal.create(null, IntegerType) :: Nil), null :: Nil)
}
+ private def interlace(keys: Seq[Literal], values: Seq[Literal]): Seq[Literal] = {
+ keys.zip(values).flatMap { case (k, v) => Seq(k, v) }
+ }
+
+ private def createMap(keys: Seq[Any], values: Seq[Any]): Map[Any, Any] = {
+ // catalyst map is order-sensitive, so we create ListMap here to preserve the elements order.
+ scala.collection.immutable.ListMap(keys.zip(values): _*)
+ }
+
test("SPARK-16715: CreateMap with Decimals") {
- val map1 = CreateMap(Seq(Literal(Decimal(0.02)), Literal(Decimal(0.001)),
- Literal(Decimal(0.001)), Literal(Decimal(0.03))))
+ val keys = Seq(0.02, 0.004)
+ val values = Seq(0.001, 0.5)
+ val keys1 = Seq(0.020, 0.004)
+ val values1 = Seq(0.001, 0.500)
+ val map1 = CreateMap(interlace(keys.map(Literal(_)), values.map(Literal(_))))
assert(map1.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
- checkEvaluation(map1, Seq(Decimal(0.020), Decimal(0.001),
- Literal(Decimal(0.001)), Literal(Decimal(0.030))))
+ checkEvaluation(map1, createMap(keys1, values1))
+ checkEvaluation(map1, createMap(keys, values))
}
test("CreateMap") {
- def interlace(keys: Seq[Literal], values: Seq[Literal]): Seq[Literal] = {
- keys.zip(values).flatMap { case (k, v) => Seq(k, v) }
- }
-
- def createMap(keys: Seq[Any], values: Seq[Any]): Map[Any, Any] = {
- // catalyst map is order-sensitive, so we create ListMap here to preserve the elements order.
- scala.collection.immutable.ListMap(keys.zip(values): _*)
- }
-
val intSeq = Seq(5, 10, 15, 20, 25)
val longSeq = intSeq.map(_.toLong)
val strSeq = intSeq.map(_.toString)
From 7143ed6fed34e726594b5cf30b49bc49fb8da4e8 Mon Sep 17 00:00:00 2001
From: keliang
Date: Wed, 27 Jul 2016 07:55:36 +0800
Subject: [PATCH 4/6] Summary:fix jira_id in the test("SPARK-16735: CreateMap
with Decimals") JIRA_ID:no Description:fix jira_id in the test("SPARK-16735:
CreateMap with Decimals") Test:no
---
.../spark/sql/catalyst/expressions/ComplexTypeSuite.scala | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index f259e69162a5..293f3aa8d1fc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -143,7 +143,7 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
scala.collection.immutable.ListMap(keys.zip(values): _*)
}
- test("SPARK-16715: CreateMap with Decimals") {
+ test("SPARK-16735: CreateMap with Decimals") {
val keys = Seq(0.02, 0.004)
val values = Seq(0.001, 0.5)
val keys1 = Seq(0.020, 0.004)
From 86b117191a0ae2bd05298116db0419ec1aeab48f Mon Sep 17 00:00:00 2001
From: keliang
Date: Thu, 28 Jul 2016 08:37:04 +0800
Subject: [PATCH 5/6] Summary:change the isTighterThan to
isDecimalTypeTighterThan JIRA_ID:SPARK-16735 Description: I have checked this
function, and it will not lost any precision or range ,it 's safe . and in
the checkDecimalType, we just check the datatype and do not change datatype.
(when keys or values contains integer type , it will pass. but still integer
type) so checkInputDataTypes will return result like it done before. and
InCase when keys or values contains integer type, I will use a new function
instead of isTighterThan that do not check integer type. Test:done
---
.../catalyst/expressions/complexTypeCreator.scala | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index f0de23fe3ec8..c06a66fa89b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -111,13 +111,25 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
}
}
+ private def isDecimalTypeTighterThan(src: DecimalType, other: DataType): Boolean = other match {
+ case dt: DecimalType =>
+ (src.precision - src.scale) <= (dt.precision - dt.scale) && src.scale <= dt.scale
+ case _ => false
+ }
+
+ /**
+ * only check decimal type contains by the coltypes
+ * @param colType
+ * @return
+ */
private def checkDecimalType(colType: Seq[Expression]): DataType = {
val elementType = colType.headOption.map(_.dataType).getOrElse(NullType)
+
elementType match {
case _ if elementType.isInstanceOf[DecimalType] =>
var tighter: DataType = elementType
colType.foreach { child =>
- if (elementType.asInstanceOf[DecimalType].isTighterThan(child.dataType)) {
+ if (isDecimalTypeTighterThan(tighter.asInstanceOf[DecimalType], child.dataType)) {
tighter = child.dataType
}
}
From c064621a123f3c7a3018cde48ab17e49e6f18c20 Mon Sep 17 00:00:00 2001
From: keliang
Date: Wed, 3 Aug 2016 16:21:38 +0800
Subject: [PATCH 6/6] Summary:add "spark.sql.broadcastTimeout" into
docs/sql-programming-guide.md JIRA_ID:SPARK-16870 Description:efault value
for spark.sql.broadcastTimeout is 300s. and this property do not show in any
docs of spark. so add "spark.sql.broadcastTimeout" into
docs/sql-programming-guide.md to help people to how to fix this timeout error
when it happenned Test:done
---
docs/sql-programming-guide.md | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index d8c8698e31d3..fd5d70259ae5 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -995,6 +995,15 @@ The following options can be used to configure the version of Hive that is used
+
+ spark.sql.broadcastTimeout |
+ 300 |
+
+
+ Timeout in seconds for the broadcast wait time in broadcast joins
+
+ |
+