From 87fc4ab29df10175f70aabf7859d9151308bc6a5 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 19 Dec 2019 11:26:08 +0800 Subject: [PATCH 1/8] [SPARK-30301][SQL] Datetimes as fields of complex types to hive string results wrong --- .../spark/sql/execution/HiveResult.scala | 6 ++-- .../spark/sql/execution/HiveResultSuite.scala | 30 ++++++++++++------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index f7f7e08462fe..f1f82088ef46 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -68,8 +68,6 @@ object HiveResult { BooleanType, ByteType, ShortType, - DateType, - TimestampType, BinaryType) private lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone) @@ -90,6 +88,10 @@ object HiveResult { toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType)) }.toSeq.sorted.mkString("{", ",", "}") case (null, _) => "null" + case (d: Date, DateType) => + dateFormatter.format(DateTimeUtils.fromJavaDate(d)) + case (t: Timestamp, TimestampType) => + DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t)) case (s: String, StringType) => "\"" + s + "\"" case (decimal, DecimalType()) => decimal.toString case (interval: CalendarInterval, CalendarIntervalType) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala index 4d1bb470e4e2..bb59b12e6f35 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala @@ -17,25 +17,35 @@ package org.apache.spark.sql.execution -import java.sql.{Date, Timestamp} - import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession} class HiveResultSuite extends SharedSparkSession { import testImplicits._ test("date formatting in hive result") { - val date = "2018-12-28" - val executedPlan = Seq(Date.valueOf(date)).toDS().queryExecution.executedPlan - val result = HiveResult.hiveResultString(executedPlan) - assert(result.head == date) + val dates = Seq("2018-12-28", "1582-10-13", "1582-10-14", "1582-10-15") + val df = dates.toDF("a").selectExpr("cast(a as date) as b") + val executedPlan1 = df.queryExecution.executedPlan + val result = HiveResult.hiveResultString(executedPlan1) + assert(result == dates) + val executedPlan2 = df.selectExpr("array(b)").queryExecution.executedPlan + val result2 = HiveResult.hiveResultString(executedPlan2) + assert(result2 == dates.map(x => s"[$x]")) } test("timestamp formatting in hive result") { - val timestamp = "2018-12-28 01:02:03" - val executedPlan = Seq(Timestamp.valueOf(timestamp)).toDS().queryExecution.executedPlan - val result = HiveResult.hiveResultString(executedPlan) - assert(result.head == timestamp) + val timestamps = Seq( + "2018-12-28 01:02:03", + "1582-10-13 01:02:03", + "1582-10-14 01:02:03", + "1582-10-15 01:02:03") + val df = timestamps.toDF("a").selectExpr("cast(a as timestamp) as b") + val executedPlan1 = df.queryExecution.executedPlan + val result = HiveResult.hiveResultString(executedPlan1) + assert(result == timestamps) + val executedPlan2 = df.selectExpr("array(b)").queryExecution.executedPlan + val result2 = HiveResult.hiveResultString(executedPlan2) + assert(result2 == timestamps.map(x => s"[$x]")) } test("toHiveString correctly handles UDTs") { From 7e1c437563da445fd0911d274fa7792a7b7e99cb Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 19 Dec 2019 13:52:24 +0800 Subject: [PATCH 2/8] unify --- .../spark/sql/execution/HiveResult.scala | 97 ++++++------------- .../sql-tests/results/csv-functions.sql.out | 2 +- 2 files changed, 30 insertions(+), 69 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index f1f82088ef46..c7b44a9e09e9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -56,80 +56,41 @@ object HiveResult { // We need the types so we can output struct field names val types = executedPlan.output.map(_.dataType) // Reformat to match hive tab delimited output. - result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t")) + result.map(_.zip(types).map(e => toHiveString(e).stripPrefix("\"").stripSuffix("\""))) + .map(_.mkString("\t")) } - private val primitiveTypes = Seq( - StringType, - IntegerType, - LongType, - DoubleType, - FloatType, - BooleanType, - ByteType, - ShortType, - BinaryType) - private lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone) private lazy val dateFormatter = DateFormatter(zoneId) private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) - /** Hive outputs fields of structs slightly differently than top level attributes. */ - private def toHiveStructString(a: (Any, DataType)): String = a match { - case (struct: Row, StructType(fields)) => - struct.toSeq.zip(fields).map { - case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}""" - }.mkString("{", ",", "}") - case (seq: Seq[_], ArrayType(typ, _)) => - seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]") - case (map: Map[_, _], MapType(kType, vType, _)) => - map.map { - case (key, value) => - toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType)) - }.toSeq.sorted.mkString("{", ",", "}") - case (null, _) => "null" - case (d: Date, DateType) => - dateFormatter.format(DateTimeUtils.fromJavaDate(d)) - case (t: Timestamp, TimestampType) => - DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t)) - case (s: String, StringType) => "\"" + s + "\"" - case (decimal, DecimalType()) => decimal.toString - case (interval: CalendarInterval, CalendarIntervalType) => - SQLConf.get.intervalOutputStyle match { - case SQL_STANDARD => toSqlStandardString(interval) - case ISO_8601 => toIso8601String(interval) - case MULTI_UNITS => toMultiUnitsString(interval) - } - case (other, tpe) if primitiveTypes contains tpe => other.toString - } - /** Formats a datum (based on the given data type) and returns the string representation. */ def toHiveString(a: (Any, DataType)): String = a match { - case (struct: Row, StructType(fields)) => - struct.toSeq.zip(fields).map { - case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}""" - }.mkString("{", ",", "}") - case (seq: Seq[_], ArrayType(typ, _)) => - seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]") - case (map: Map[_, _], MapType(kType, vType, _)) => - map.map { - case (key, value) => - toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType)) - }.toSeq.sorted.mkString("{", ",", "}") - case (null, _) => "NULL" - case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d)) - case (t: Timestamp, TimestampType) => - DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t)) - case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) - case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString - case (interval: CalendarInterval, CalendarIntervalType) => - SQLConf.get.intervalOutputStyle match { - case SQL_STANDARD => toSqlStandardString(interval) - case ISO_8601 => toIso8601String(interval) - case MULTI_UNITS => toMultiUnitsString(interval) - } - case (interval, CalendarIntervalType) => interval.toString - case (other, _ : UserDefinedType[_]) => other.toString - case (other, tpe) if primitiveTypes.contains(tpe) => other.toString - } + case (null, _) => "NULL" + case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d)) + case (t: Timestamp, TimestampType) => + timestampFormatter.format(DateTimeUtils.fromJavaTimestamp(t)) + case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) + case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString + case (n, _: NumericType) => n.toString + case (bin, BinaryType) => bin.toString + case (s: String, StringType) => "\"" + s + "\"" + case (interval: CalendarInterval, CalendarIntervalType) => + SQLConf.get.intervalOutputStyle match { + case SQL_STANDARD => toSqlStandardString(interval) + case ISO_8601 => toIso8601String(interval) + case MULTI_UNITS => toMultiUnitsString(interval) + } + case (seq: Seq[_], ArrayType(typ, _)) => + seq.map(v => (v, typ)).map(toHiveString).mkString("[", ",", "]") + case (map: Map[_, _], MapType(kType, vType, _)) => + map.map { case (key, value) => + toHiveString((key, kType)) + ":" + toHiveString((value, vType)) + }.toSeq.sorted.mkString("{", ",", "}") + case (struct: Row, StructType(fields)) => + struct.toSeq.zip(fields).map { + case (v, t) => s""""${t.name}":${toHiveString((v, t.dataType))}""" + }.mkString("{", ",", "}") + case (other, _ : UserDefinedType[_]) => other.toString + } } diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out index 03d4bfffa892..6cae948f1b4b 100644 --- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out @@ -15,7 +15,7 @@ select from_csv('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yy -- !query 1 schema struct> -- !query 1 output -{"time":2015-08-26 00:00:00.0} +{"time":2015-08-26 00:00:00} -- !query 2 From 5b8bec344529e4705d5e7f256ce22e4539cc6d54 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 19 Dec 2019 14:15:41 +0800 Subject: [PATCH 3/8] nit --- .../main/scala/org/apache/spark/sql/execution/HiveResult.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index c7b44a9e09e9..15e9ffb2e4a1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -73,7 +73,6 @@ object HiveResult { case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString case (n, _: NumericType) => n.toString - case (bin, BinaryType) => bin.toString case (s: String, StringType) => "\"" + s + "\"" case (interval: CalendarInterval, CalendarIntervalType) => SQLConf.get.intervalOutputStyle match { From bb77e3c920eb62bbdb379d4974f55c8a9860e8b4 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 19 Dec 2019 16:01:10 +0800 Subject: [PATCH 4/8] fix tests --- .../spark/sql/execution/HiveResult.scala | 53 ++++++++++--------- .../resources/sql-tests/results/array.sql.out | 4 +- .../sql-tests/results/json-functions.sql.out | 6 +-- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 15e9ffb2e4a1..886dd9af1a71 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -66,30 +66,31 @@ object HiveResult { /** Formats a datum (based on the given data type) and returns the string representation. */ def toHiveString(a: (Any, DataType)): String = a match { - case (null, _) => "NULL" - case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d)) - case (t: Timestamp, TimestampType) => - timestampFormatter.format(DateTimeUtils.fromJavaTimestamp(t)) - case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) - case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString - case (n, _: NumericType) => n.toString - case (s: String, StringType) => "\"" + s + "\"" - case (interval: CalendarInterval, CalendarIntervalType) => - SQLConf.get.intervalOutputStyle match { - case SQL_STANDARD => toSqlStandardString(interval) - case ISO_8601 => toIso8601String(interval) - case MULTI_UNITS => toMultiUnitsString(interval) - } - case (seq: Seq[_], ArrayType(typ, _)) => - seq.map(v => (v, typ)).map(toHiveString).mkString("[", ",", "]") - case (map: Map[_, _], MapType(kType, vType, _)) => - map.map { case (key, value) => - toHiveString((key, kType)) + ":" + toHiveString((value, vType)) - }.toSeq.sorted.mkString("{", ",", "}") - case (struct: Row, StructType(fields)) => - struct.toSeq.zip(fields).map { - case (v, t) => s""""${t.name}":${toHiveString((v, t.dataType))}""" - }.mkString("{", ",", "}") - case (other, _ : UserDefinedType[_]) => other.toString - } + case (null, _) => "NULL" + case (b, BooleanType) => b.toString + case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d)) + case (t: Timestamp, TimestampType) => + timestampFormatter.format(DateTimeUtils.fromJavaTimestamp(t)) + case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) + case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString + case (n, _: NumericType) => n.toString + case (s: String, StringType) => "\"" + s + "\"" + case (interval: CalendarInterval, CalendarIntervalType) => + SQLConf.get.intervalOutputStyle match { + case SQL_STANDARD => toSqlStandardString(interval) + case ISO_8601 => toIso8601String(interval) + case MULTI_UNITS => toMultiUnitsString(interval) + } + case (seq: Seq[_], ArrayType(typ, _)) => + seq.map(v => (v, typ)).map(toHiveString).mkString("[", ",", "]") + case (m: Map[_, _], MapType(kType, vType, _)) => + m.map { case (key, value) => + toHiveString((key, kType)) + ":" + toHiveString((value, vType)) + }.toSeq.sorted.mkString("{", ",", "}") + case (struct: Row, StructType(fields)) => + struct.toSeq.zip(fields).map { case (v, t) => + s""""${t.name}":${toHiveString((v, t.dataType))}""" + }.mkString("{", ",", "}") + case (other, _: UserDefinedType[_]) => other.toString + } } diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out index 4195205e275a..31987de5da9b 100644 --- a/sql/core/src/test/resources/sql-tests/results/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out @@ -75,7 +75,7 @@ select * from primitive_arrays -- !query 5 schema struct,tinyint_array:array,smallint_array:array,int_array:array,bigint_array:array,decimal_array:array,double_array:array,float_array:array,date_array:array,timestamp_array:array> -- !query 5 output -[true] [2,1] [2,1] [2,1] [2,1] [9223372036854775809,9223372036854775808] [2.0,1.0] [2.0,1.0] [2016-03-14,2016-03-13] [2016-11-15 20:54:00.0,2016-11-12 20:54:00.0] +[true] [2,1] [2,1] [2,1] [2,1] [9223372036854775809,9223372036854775808] [2.0,1.0] [2.0,1.0] [2016-03-14,2016-03-13] [2016-11-15 20:54:00,2016-11-12 20:54:00] -- !query 6 @@ -122,7 +122,7 @@ from primitive_arrays -- !query 8 schema struct,sort_array(tinyint_array, true):array,sort_array(smallint_array, true):array,sort_array(int_array, true):array,sort_array(bigint_array, true):array,sort_array(decimal_array, true):array,sort_array(double_array, true):array,sort_array(float_array, true):array,sort_array(date_array, true):array,sort_array(timestamp_array, true):array> -- !query 8 output -[true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00.0,2016-11-15 20:54:00.0] +[true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00,2016-11-15 20:54:00] -- !query 9 diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index ca0cd90d94fa..70a2445a8d4b 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -106,7 +106,7 @@ select from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat -- !query 12 schema struct> -- !query 12 output -{"time":2015-08-26 00:00:00.0} +{"time":2015-08-26 00:00:00} -- !query 13 @@ -245,7 +245,7 @@ select from_json('[1, 2, null]', 'array') -- !query 28 schema struct> -- !query 28 output -[1,2,null] +[1,2,NULL] -- !query 29 @@ -269,7 +269,7 @@ select from_json('[null, {"a":2}]', 'array>') -- !query 31 schema struct>> -- !query 31 output -[null,{"a":2}] +[NULL,{"a":2}] -- !query 32 From 1d4a9bfe2abcdb6ca0ede1b3881e9cd9f5d00ba5 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 19 Dec 2019 20:06:42 +0800 Subject: [PATCH 5/8] fix tests --- .../spark/sql/catalyst/expressions/collectionOperations.scala | 2 +- .../sql-tests/results/ansi/higher-order-functions.sql.out | 4 ++-- .../sql-tests/results/higher-order-functions.sql.out | 4 ++-- .../test/resources/sql-tests/results/udf/udf-union.sql.out | 4 ++-- sql/core/src/test/resources/sql-tests/results/union.sql.out | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 6ed68e47ce7a..a2ce55ccf326 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -3043,7 +3043,7 @@ trait ArraySetLike { examples = """ Examples: > SELECT _FUNC_(array(1, 2, 3, null, 3)); - [1,2,3,null] + [1,2,3,NULL] """, since = "2.4.0") case class ArrayDistinct(child: Expression) extends UnaryExpression with ArraySetLike with ExpectsInputTypes { diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out index e7dae6595a89..20689778d622 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out @@ -171,7 +171,7 @@ select zip_with(ys, zs, (a, b) -> a + size(b)) as v from nested struct> -- !query 17 output [13] -[34,99,null] +[34,99,NULL] [80,-74] @@ -188,7 +188,7 @@ select zip_with(array('a'), array('d', null, 'f'), (x, y) -> coalesce(x, y)) as -- !query 19 schema struct> -- !query 19 output -["a",null,"f"] +["a",NULL,"f"] -- !query 20 diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out index ca1d747874c5..0710451a5b88 100644 --- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out @@ -171,7 +171,7 @@ select zip_with(ys, zs, (a, b) -> a + size(b)) as v from nested struct> -- !query 17 output [13] -[34,99,null] +[34,99,NULL] [80,-74] @@ -188,7 +188,7 @@ select zip_with(array('a'), array('d', null, 'f'), (x, y) -> coalesce(x, y)) as -- !query 19 schema struct> -- !query 19 output -["a",null,"f"] +["a",NULL,"f"] -- !query 20 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out index 835abe3d3277..846d61b0b136 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out @@ -111,7 +111,7 @@ SELECT map(1, 2, 3, NULL), udf(1) -- !query 9 schema struct,str:string> -- !query 9 output -{1:2,3:null} 1 +{1:2,3:NULL} 1 {1:2} str @@ -122,7 +122,7 @@ SELECT array(1, 2, 3, NULL), udf(1) -- !query 10 schema struct,str:string> -- !query 10 output -[1,2,3,null] 1 +[1,2,3,NULL] 1 [1,2] str diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out index 95a10f0b0a8c..c62635a5dc7d 100644 --- a/sql/core/src/test/resources/sql-tests/results/union.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out @@ -111,7 +111,7 @@ SELECT map(1, 2, 3, NULL), 1 -- !query 9 schema struct,str:string> -- !query 9 output -{1:2,3:null} 1 +{1:2,3:NULL} 1 {1:2} str @@ -122,7 +122,7 @@ SELECT array(1, 2, 3, NULL), 1 -- !query 10 schema struct,str:string> -- !query 10 output -[1,2,3,null] 1 +[1,2,3,NULL] 1 [1,2] str From e4bcc8cc1cef827709d0eb67b42e7aab812aca96 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 19 Dec 2019 23:48:05 +0800 Subject: [PATCH 6/8] fix hive compatibilty --- .../expressions/collectionOperations.scala | 2 +- .../apache/spark/sql/execution/HiveResult.scala | 14 +++++++------- .../results/ansi/higher-order-functions.sql.out | 4 ++-- .../results/higher-order-functions.sql.out | 4 ++-- .../sql-tests/results/json-functions.sql.out | 4 ++-- .../sql-tests/results/udf/udf-union.sql.out | 4 ++-- .../test/resources/sql-tests/results/union.sql.out | 4 ++-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index a2ce55ccf326..6ed68e47ce7a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -3043,7 +3043,7 @@ trait ArraySetLike { examples = """ Examples: > SELECT _FUNC_(array(1, 2, 3, null, 3)); - [1,2,3,NULL] + [1,2,3,null] """, since = "2.4.0") case class ArrayDistinct(child: Expression) extends UnaryExpression with ArraySetLike with ExpectsInputTypes { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 886dd9af1a71..c92b10cc0364 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -56,7 +56,7 @@ object HiveResult { // We need the types so we can output struct field names val types = executedPlan.output.map(_.dataType) // Reformat to match hive tab delimited output. - result.map(_.zip(types).map(e => toHiveString(e).stripPrefix("\"").stripSuffix("\""))) + result.map(_.zip(types).map(e => toHiveString(e))) .map(_.mkString("\t")) } @@ -65,8 +65,8 @@ object HiveResult { private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) /** Formats a datum (based on the given data type) and returns the string representation. */ - def toHiveString(a: (Any, DataType)): String = a match { - case (null, _) => "NULL" + def toHiveString(a: (Any, DataType), nested: Boolean = false): String = a match { + case (null, _) => if (nested) "null" else "NULL" case (b, BooleanType) => b.toString case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d)) case (t: Timestamp, TimestampType) => @@ -74,7 +74,7 @@ object HiveResult { case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString case (n, _: NumericType) => n.toString - case (s: String, StringType) => "\"" + s + "\"" + case (s: String, StringType) => if (nested) "\"" + s + "\"" else s case (interval: CalendarInterval, CalendarIntervalType) => SQLConf.get.intervalOutputStyle match { case SQL_STANDARD => toSqlStandardString(interval) @@ -82,14 +82,14 @@ object HiveResult { case MULTI_UNITS => toMultiUnitsString(interval) } case (seq: Seq[_], ArrayType(typ, _)) => - seq.map(v => (v, typ)).map(toHiveString).mkString("[", ",", "]") + seq.map(v => (v, typ)).map(e => toHiveString(e, true)).mkString("[", ",", "]") case (m: Map[_, _], MapType(kType, vType, _)) => m.map { case (key, value) => - toHiveString((key, kType)) + ":" + toHiveString((value, vType)) + toHiveString((key, kType), true) + ":" + toHiveString((value, vType), true) }.toSeq.sorted.mkString("{", ",", "}") case (struct: Row, StructType(fields)) => struct.toSeq.zip(fields).map { case (v, t) => - s""""${t.name}":${toHiveString((v, t.dataType))}""" + s""""${t.name}":${toHiveString((v, t.dataType), true)}""" }.mkString("{", ",", "}") case (other, _: UserDefinedType[_]) => other.toString } diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out index 20689778d622..e7dae6595a89 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out @@ -171,7 +171,7 @@ select zip_with(ys, zs, (a, b) -> a + size(b)) as v from nested struct> -- !query 17 output [13] -[34,99,NULL] +[34,99,null] [80,-74] @@ -188,7 +188,7 @@ select zip_with(array('a'), array('d', null, 'f'), (x, y) -> coalesce(x, y)) as -- !query 19 schema struct> -- !query 19 output -["a",NULL,"f"] +["a",null,"f"] -- !query 20 diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out index 0710451a5b88..ca1d747874c5 100644 --- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out @@ -171,7 +171,7 @@ select zip_with(ys, zs, (a, b) -> a + size(b)) as v from nested struct> -- !query 17 output [13] -[34,99,NULL] +[34,99,null] [80,-74] @@ -188,7 +188,7 @@ select zip_with(array('a'), array('d', null, 'f'), (x, y) -> coalesce(x, y)) as -- !query 19 schema struct> -- !query 19 output -["a",NULL,"f"] +["a",null,"f"] -- !query 20 diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 70a2445a8d4b..1f6e5e78ca22 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -245,7 +245,7 @@ select from_json('[1, 2, null]', 'array') -- !query 28 schema struct> -- !query 28 output -[1,2,NULL] +[1,2,null] -- !query 29 @@ -269,7 +269,7 @@ select from_json('[null, {"a":2}]', 'array>') -- !query 31 schema struct>> -- !query 31 output -[NULL,{"a":2}] +[null,{"a":2}] -- !query 32 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out index 846d61b0b136..835abe3d3277 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out @@ -111,7 +111,7 @@ SELECT map(1, 2, 3, NULL), udf(1) -- !query 9 schema struct,str:string> -- !query 9 output -{1:2,3:NULL} 1 +{1:2,3:null} 1 {1:2} str @@ -122,7 +122,7 @@ SELECT array(1, 2, 3, NULL), udf(1) -- !query 10 schema struct,str:string> -- !query 10 output -[1,2,3,NULL] 1 +[1,2,3,null] 1 [1,2] str diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out index c62635a5dc7d..95a10f0b0a8c 100644 --- a/sql/core/src/test/resources/sql-tests/results/union.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out @@ -111,7 +111,7 @@ SELECT map(1, 2, 3, NULL), 1 -- !query 9 schema struct,str:string> -- !query 9 output -{1:2,3:NULL} 1 +{1:2,3:null} 1 {1:2} str @@ -122,7 +122,7 @@ SELECT array(1, 2, 3, NULL), 1 -- !query 10 schema struct,str:string> -- !query 10 output -[1,2,3,NULL] 1 +[1,2,3,null] 1 [1,2] str From 32383e66182ca7beaeeb626a9a44b737b5ee1ff6 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 20 Dec 2019 00:38:30 +0800 Subject: [PATCH 7/8] regen all g files --- .../src/test/resources/sql-tests/results/inline-table.sql.out | 2 +- .../sql-tests/results/typeCoercion/native/concat.sql.out | 2 +- .../sql-tests/results/typeCoercion/native/mapZipWith.sql.out | 2 +- .../sql-tests/results/typeCoercion/native/mapconcat.sql.out | 2 +- .../resources/sql-tests/results/udf/udf-inline-table.sql.out | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out index 4e80f0bda551..bf5a560ec072 100644 --- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out @@ -150,4 +150,4 @@ select * from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991- -- !query 16 schema struct> -- !query 16 output -1991-12-06 00:00:00 [1991-12-06 01:00:00.0,1991-12-06 12:00:00.0] +1991-12-06 00:00:00 [1991-12-06 01:00:00,1991-12-06 12:00:00] diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out index 6c6d3110d7d0..d7ebc3c77ed5 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out @@ -298,7 +298,7 @@ FROM various_arrays -- !query 12 schema struct,tinyint_array:array,smallint_array:array,int_array:array,bigint_array:array,decimal_array:array,double_array:array,float_array:array,data_array:array,timestamp_array:array,string_array:array,array_array:array>,struct_array:array>,map_array:array>> -- !query 12 output -[true,false,true] [2,1,3,4] [2,1,3,4] [2,1,3,4] [2,1,3,4] [9223372036854775809,9223372036854775808,9223372036854775808,9223372036854775809] [2.0,1.0,3.0,4.0] [2.0,1.0,3.0,4.0] [2016-03-14,2016-03-13,2016-03-12,2016-03-11] [2016-11-15 20:54:00.0,2016-11-12 20:54:00.0,2016-11-11 20:54:00.0] ["a","b","c","d"] [["a","b"],["c","d"],["e"],["f"]] [{"col1":"a","col2":1},{"col1":"b","col2":2},{"col1":"c","col2":3},{"col1":"d","col2":4}] [{"a":1},{"b":2},{"c":3},{"d":4}] +[true,false,true] [2,1,3,4] [2,1,3,4] [2,1,3,4] [2,1,3,4] [9223372036854775809,9223372036854775808,9223372036854775808,9223372036854775809] [2.0,1.0,3.0,4.0] [2.0,1.0,3.0,4.0] [2016-03-14,2016-03-13,2016-03-12,2016-03-11] [2016-11-15 20:54:00,2016-11-12 20:54:00,2016-11-11 20:54:00] ["a","b","c","d"] [["a","b"],["c","d"],["e"],["f"]] [{"col1":"a","col2":1},{"col1":"b","col2":2},{"col1":"c","col2":3},{"col1":"d","col2":4}] [{"a":1},{"b":2},{"c":3},{"d":4}] -- !query 13 diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out index 86a578ca013d..2fdaf63cd3bf 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out @@ -149,7 +149,7 @@ FROM various_maps -- !query 12 schema struct>> -- !query 12 output -{"2016-11-15 20:54:00":{"k":"2016-11-15 20:54:00","v1":2016-11-12 20:54:00.0,"v2":null},"2016-11-15 20:54:00.000":{"k":"2016-11-15 20:54:00.000","v1":null,"v2":"2016-11-12 20:54:00.000"}} +{"2016-11-15 20:54:00":{"k":"2016-11-15 20:54:00","v1":2016-11-12 20:54:00,"v2":null},"2016-11-15 20:54:00.000":{"k":"2016-11-15 20:54:00.000","v1":null,"v2":"2016-11-12 20:54:00.000"}} -- !query 13 diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out index 79e00860e4c0..ee6d62b48388 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out @@ -65,7 +65,7 @@ FROM various_maps -- !query 1 schema struct,tinyint_map:map,smallint_map:map,int_map:map,bigint_map:map,decimal_map:map,float_map:map,double_map:map,date_map:map,timestamp_map:map,string_map:map,array_map:map,array>,struct_map:map,struct>,string_int_map:map,int_string_map:map> -- !query 1 output -{false:true,true:false} {1:2,3:4} {1:2,3:4} {4:6,7:8} {6:7,8:9} {9223372036854775808:9223372036854775809,9223372036854775809:9223372036854775808} {1.0:2.0,3.0:4.0} {1.0:2.0,3.0:4.0} {2016-03-12:2016-03-11,2016-03-14:2016-03-13} {2016-11-11 20:54:00.0:2016-11-09 20:54:00.0,2016-11-15 20:54:00.0:2016-11-12 20:54:00.0} {"a":"b","c":"d"} {["a","b"]:["c","d"],["e"]:["f"]} {{"col1":"a","col2":1}:{"col1":"b","col2":2},{"col1":"c","col2":3}:{"col1":"d","col2":4}} {"a":1,"c":2} {1:"a",2:"c"} +{false:true,true:false} {1:2,3:4} {1:2,3:4} {4:6,7:8} {6:7,8:9} {9223372036854775808:9223372036854775809,9223372036854775809:9223372036854775808} {1.0:2.0,3.0:4.0} {1.0:2.0,3.0:4.0} {2016-03-12:2016-03-11,2016-03-14:2016-03-13} {2016-11-11 20:54:00:2016-11-09 20:54:00,2016-11-15 20:54:00:2016-11-12 20:54:00} {"a":"b","c":"d"} {["a","b"]:["c","d"],["e"]:["f"]} {{"col1":"a","col2":1}:{"col1":"b","col2":2},{"col1":"c","col2":3}:{"col1":"d","col2":4}} {"a":1,"c":2} {1:"a",2:"c"} -- !query 2 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out index 2cf24e50c80a..9203c2b31dc1 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out @@ -150,4 +150,4 @@ select udf(a), b from values (timestamp('1991-12-06 00:00:00.0'), array(timestam -- !query 16 schema struct> -- !query 16 output -1991-12-06 00:00:00 [1991-12-06 01:00:00.0,1991-12-06 12:00:00.0] +1991-12-06 00:00:00 [1991-12-06 01:00:00,1991-12-06 12:00:00] From 8310be0a9a13313e2ee1db828879847e68c44e38 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 20 Dec 2019 10:07:44 +0800 Subject: [PATCH 8/8] fix examples --- .../apache/spark/sql/catalyst/expressions/csvExpressions.scala | 2 +- .../apache/spark/sql/catalyst/expressions/jsonExpressions.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala index 67c24f687af0..73d329b4f582 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala @@ -42,7 +42,7 @@ import org.apache.spark.unsafe.types.UTF8String > SELECT _FUNC_('1, 0.8', 'a INT, b DOUBLE'); {"a":1,"b":0.8} > SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')); - {"time":2015-08-26 00:00:00.0} + {"time":2015-08-26 00:00:00} """, since = "3.0.0") // scalastyle:on line.size.limit diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index a1e3a84bd045..3c08d866444d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -505,7 +505,7 @@ case class JsonTuple(children: Seq[Expression]) > SELECT _FUNC_('{"a":1, "b":0.8}', 'a INT, b DOUBLE'); {"a":1,"b":0.8} > SELECT _FUNC_('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')); - {"time":2015-08-26 00:00:00.0} + {"time":2015-08-26 00:00:00} """, since = "2.2.0") // scalastyle:on line.size.limit