diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala index 67c24f687af0..73d329b4f582 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala @@ -42,7 +42,7 @@ import org.apache.spark.unsafe.types.UTF8String > SELECT _FUNC_('1, 0.8', 'a INT, b DOUBLE'); {"a":1,"b":0.8} > SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')); - {"time":2015-08-26 00:00:00.0} + {"time":2015-08-26 00:00:00} """, since = "3.0.0") // scalastyle:on line.size.limit diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index a1e3a84bd045..3c08d866444d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -505,7 +505,7 @@ case class JsonTuple(children: Seq[Expression]) > SELECT _FUNC_('{"a":1, "b":0.8}', 'a INT, b DOUBLE'); {"a":1,"b":0.8} > SELECT _FUNC_('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')); - {"time":2015-08-26 00:00:00.0} + {"time":2015-08-26 00:00:00} """, since = "2.2.0") // scalastyle:on line.size.limit diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index f7f7e08462fe..c92b10cc0364 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -56,78 +56,41 @@ object HiveResult { // We need the types so we can output struct field names val types = executedPlan.output.map(_.dataType) // Reformat to match hive tab delimited output. - result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t")) + result.map(_.zip(types).map(e => toHiveString(e))) + .map(_.mkString("\t")) } - private val primitiveTypes = Seq( - StringType, - IntegerType, - LongType, - DoubleType, - FloatType, - BooleanType, - ByteType, - ShortType, - DateType, - TimestampType, - BinaryType) - private lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone) private lazy val dateFormatter = DateFormatter(zoneId) private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) - /** Hive outputs fields of structs slightly differently than top level attributes. */ - private def toHiveStructString(a: (Any, DataType)): String = a match { - case (struct: Row, StructType(fields)) => - struct.toSeq.zip(fields).map { - case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}""" - }.mkString("{", ",", "}") - case (seq: Seq[_], ArrayType(typ, _)) => - seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]") - case (map: Map[_, _], MapType(kType, vType, _)) => - map.map { - case (key, value) => - toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType)) - }.toSeq.sorted.mkString("{", ",", "}") - case (null, _) => "null" - case (s: String, StringType) => "\"" + s + "\"" - case (decimal, DecimalType()) => decimal.toString - case (interval: CalendarInterval, CalendarIntervalType) => - SQLConf.get.intervalOutputStyle match { - case SQL_STANDARD => toSqlStandardString(interval) - case ISO_8601 => toIso8601String(interval) - case MULTI_UNITS => toMultiUnitsString(interval) - } - case (other, tpe) if primitiveTypes contains tpe => other.toString - } - /** Formats a datum (based on the given data type) and returns the string representation. */ - def toHiveString(a: (Any, DataType)): String = a match { - case (struct: Row, StructType(fields)) => - struct.toSeq.zip(fields).map { - case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}""" - }.mkString("{", ",", "}") - case (seq: Seq[_], ArrayType(typ, _)) => - seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]") - case (map: Map[_, _], MapType(kType, vType, _)) => - map.map { - case (key, value) => - toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType)) - }.toSeq.sorted.mkString("{", ",", "}") - case (null, _) => "NULL" + def toHiveString(a: (Any, DataType), nested: Boolean = false): String = a match { + case (null, _) => if (nested) "null" else "NULL" + case (b, BooleanType) => b.toString case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d)) case (t: Timestamp, TimestampType) => - DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t)) + timestampFormatter.format(DateTimeUtils.fromJavaTimestamp(t)) case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString + case (n, _: NumericType) => n.toString + case (s: String, StringType) => if (nested) "\"" + s + "\"" else s case (interval: CalendarInterval, CalendarIntervalType) => SQLConf.get.intervalOutputStyle match { case SQL_STANDARD => toSqlStandardString(interval) case ISO_8601 => toIso8601String(interval) case MULTI_UNITS => toMultiUnitsString(interval) } - case (interval, CalendarIntervalType) => interval.toString - case (other, _ : UserDefinedType[_]) => other.toString - case (other, tpe) if primitiveTypes.contains(tpe) => other.toString + case (seq: Seq[_], ArrayType(typ, _)) => + seq.map(v => (v, typ)).map(e => toHiveString(e, true)).mkString("[", ",", "]") + case (m: Map[_, _], MapType(kType, vType, _)) => + m.map { case (key, value) => + toHiveString((key, kType), true) + ":" + toHiveString((value, vType), true) + }.toSeq.sorted.mkString("{", ",", "}") + case (struct: Row, StructType(fields)) => + struct.toSeq.zip(fields).map { case (v, t) => + s""""${t.name}":${toHiveString((v, t.dataType), true)}""" + }.mkString("{", ",", "}") + case (other, _: UserDefinedType[_]) => other.toString } } diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out index 4195205e275a..31987de5da9b 100644 --- a/sql/core/src/test/resources/sql-tests/results/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out @@ -75,7 +75,7 @@ select * from primitive_arrays -- !query 5 schema struct,tinyint_array:array,smallint_array:array,int_array:array,bigint_array:array,decimal_array:array,double_array:array,float_array:array,date_array:array,timestamp_array:array> -- !query 5 output -[true] [2,1] [2,1] [2,1] [2,1] [9223372036854775809,9223372036854775808] [2.0,1.0] [2.0,1.0] [2016-03-14,2016-03-13] [2016-11-15 20:54:00.0,2016-11-12 20:54:00.0] +[true] [2,1] [2,1] [2,1] [2,1] [9223372036854775809,9223372036854775808] [2.0,1.0] [2.0,1.0] [2016-03-14,2016-03-13] [2016-11-15 20:54:00,2016-11-12 20:54:00] -- !query 6 @@ -122,7 +122,7 @@ from primitive_arrays -- !query 8 schema struct,sort_array(tinyint_array, true):array,sort_array(smallint_array, true):array,sort_array(int_array, true):array,sort_array(bigint_array, true):array,sort_array(decimal_array, true):array,sort_array(double_array, true):array,sort_array(float_array, true):array,sort_array(date_array, true):array,sort_array(timestamp_array, true):array> -- !query 8 output -[true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00.0,2016-11-15 20:54:00.0] +[true] [1,2] [1,2] [1,2] [1,2] [9223372036854775808,9223372036854775809] [1.0,2.0] [1.0,2.0] [2016-03-13,2016-03-14] [2016-11-12 20:54:00,2016-11-15 20:54:00] -- !query 9 diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out index 03d4bfffa892..6cae948f1b4b 100644 --- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out @@ -15,7 +15,7 @@ select from_csv('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yy -- !query 1 schema struct> -- !query 1 output -{"time":2015-08-26 00:00:00.0} +{"time":2015-08-26 00:00:00} -- !query 2 diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out index 4e80f0bda551..bf5a560ec072 100644 --- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out @@ -150,4 +150,4 @@ select * from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991- -- !query 16 schema struct> -- !query 16 output -1991-12-06 00:00:00 [1991-12-06 01:00:00.0,1991-12-06 12:00:00.0] +1991-12-06 00:00:00 [1991-12-06 01:00:00,1991-12-06 12:00:00] diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index ca0cd90d94fa..1f6e5e78ca22 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -106,7 +106,7 @@ select from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat -- !query 12 schema struct> -- !query 12 output -{"time":2015-08-26 00:00:00.0} +{"time":2015-08-26 00:00:00} -- !query 13 diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out index 6c6d3110d7d0..d7ebc3c77ed5 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out @@ -298,7 +298,7 @@ FROM various_arrays -- !query 12 schema struct,tinyint_array:array,smallint_array:array,int_array:array,bigint_array:array,decimal_array:array,double_array:array,float_array:array,data_array:array,timestamp_array:array,string_array:array,array_array:array>,struct_array:array>,map_array:array>> -- !query 12 output -[true,false,true] [2,1,3,4] [2,1,3,4] [2,1,3,4] [2,1,3,4] [9223372036854775809,9223372036854775808,9223372036854775808,9223372036854775809] [2.0,1.0,3.0,4.0] [2.0,1.0,3.0,4.0] [2016-03-14,2016-03-13,2016-03-12,2016-03-11] [2016-11-15 20:54:00.0,2016-11-12 20:54:00.0,2016-11-11 20:54:00.0] ["a","b","c","d"] [["a","b"],["c","d"],["e"],["f"]] [{"col1":"a","col2":1},{"col1":"b","col2":2},{"col1":"c","col2":3},{"col1":"d","col2":4}] [{"a":1},{"b":2},{"c":3},{"d":4}] +[true,false,true] [2,1,3,4] [2,1,3,4] [2,1,3,4] [2,1,3,4] [9223372036854775809,9223372036854775808,9223372036854775808,9223372036854775809] [2.0,1.0,3.0,4.0] [2.0,1.0,3.0,4.0] [2016-03-14,2016-03-13,2016-03-12,2016-03-11] [2016-11-15 20:54:00,2016-11-12 20:54:00,2016-11-11 20:54:00] ["a","b","c","d"] [["a","b"],["c","d"],["e"],["f"]] [{"col1":"a","col2":1},{"col1":"b","col2":2},{"col1":"c","col2":3},{"col1":"d","col2":4}] [{"a":1},{"b":2},{"c":3},{"d":4}] -- !query 13 diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out index 86a578ca013d..2fdaf63cd3bf 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out @@ -149,7 +149,7 @@ FROM various_maps -- !query 12 schema struct>> -- !query 12 output -{"2016-11-15 20:54:00":{"k":"2016-11-15 20:54:00","v1":2016-11-12 20:54:00.0,"v2":null},"2016-11-15 20:54:00.000":{"k":"2016-11-15 20:54:00.000","v1":null,"v2":"2016-11-12 20:54:00.000"}} +{"2016-11-15 20:54:00":{"k":"2016-11-15 20:54:00","v1":2016-11-12 20:54:00,"v2":null},"2016-11-15 20:54:00.000":{"k":"2016-11-15 20:54:00.000","v1":null,"v2":"2016-11-12 20:54:00.000"}} -- !query 13 diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out index 79e00860e4c0..ee6d62b48388 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out @@ -65,7 +65,7 @@ FROM various_maps -- !query 1 schema struct,tinyint_map:map,smallint_map:map,int_map:map,bigint_map:map,decimal_map:map,float_map:map,double_map:map,date_map:map,timestamp_map:map,string_map:map,array_map:map,array>,struct_map:map,struct>,string_int_map:map,int_string_map:map> -- !query 1 output -{false:true,true:false} {1:2,3:4} {1:2,3:4} {4:6,7:8} {6:7,8:9} {9223372036854775808:9223372036854775809,9223372036854775809:9223372036854775808} {1.0:2.0,3.0:4.0} {1.0:2.0,3.0:4.0} {2016-03-12:2016-03-11,2016-03-14:2016-03-13} {2016-11-11 20:54:00.0:2016-11-09 20:54:00.0,2016-11-15 20:54:00.0:2016-11-12 20:54:00.0} {"a":"b","c":"d"} {["a","b"]:["c","d"],["e"]:["f"]} {{"col1":"a","col2":1}:{"col1":"b","col2":2},{"col1":"c","col2":3}:{"col1":"d","col2":4}} {"a":1,"c":2} {1:"a",2:"c"} +{false:true,true:false} {1:2,3:4} {1:2,3:4} {4:6,7:8} {6:7,8:9} {9223372036854775808:9223372036854775809,9223372036854775809:9223372036854775808} {1.0:2.0,3.0:4.0} {1.0:2.0,3.0:4.0} {2016-03-12:2016-03-11,2016-03-14:2016-03-13} {2016-11-11 20:54:00:2016-11-09 20:54:00,2016-11-15 20:54:00:2016-11-12 20:54:00} {"a":"b","c":"d"} {["a","b"]:["c","d"],["e"]:["f"]} {{"col1":"a","col2":1}:{"col1":"b","col2":2},{"col1":"c","col2":3}:{"col1":"d","col2":4}} {"a":1,"c":2} {1:"a",2:"c"} -- !query 2 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out index 2cf24e50c80a..9203c2b31dc1 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out @@ -150,4 +150,4 @@ select udf(a), b from values (timestamp('1991-12-06 00:00:00.0'), array(timestam -- !query 16 schema struct> -- !query 16 output -1991-12-06 00:00:00 [1991-12-06 01:00:00.0,1991-12-06 12:00:00.0] +1991-12-06 00:00:00 [1991-12-06 01:00:00,1991-12-06 12:00:00] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala index 4d1bb470e4e2..bb59b12e6f35 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala @@ -17,25 +17,35 @@ package org.apache.spark.sql.execution -import java.sql.{Date, Timestamp} - import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession} class HiveResultSuite extends SharedSparkSession { import testImplicits._ test("date formatting in hive result") { - val date = "2018-12-28" - val executedPlan = Seq(Date.valueOf(date)).toDS().queryExecution.executedPlan - val result = HiveResult.hiveResultString(executedPlan) - assert(result.head == date) + val dates = Seq("2018-12-28", "1582-10-13", "1582-10-14", "1582-10-15") + val df = dates.toDF("a").selectExpr("cast(a as date) as b") + val executedPlan1 = df.queryExecution.executedPlan + val result = HiveResult.hiveResultString(executedPlan1) + assert(result == dates) + val executedPlan2 = df.selectExpr("array(b)").queryExecution.executedPlan + val result2 = HiveResult.hiveResultString(executedPlan2) + assert(result2 == dates.map(x => s"[$x]")) } test("timestamp formatting in hive result") { - val timestamp = "2018-12-28 01:02:03" - val executedPlan = Seq(Timestamp.valueOf(timestamp)).toDS().queryExecution.executedPlan - val result = HiveResult.hiveResultString(executedPlan) - assert(result.head == timestamp) + val timestamps = Seq( + "2018-12-28 01:02:03", + "1582-10-13 01:02:03", + "1582-10-14 01:02:03", + "1582-10-15 01:02:03") + val df = timestamps.toDF("a").selectExpr("cast(a as timestamp) as b") + val executedPlan1 = df.queryExecution.executedPlan + val result = HiveResult.hiveResultString(executedPlan1) + assert(result == timestamps) + val executedPlan2 = df.selectExpr("array(b)").queryExecution.executedPlan + val result2 = HiveResult.hiveResultString(executedPlan2) + assert(result2 == timestamps.map(x => s"[$x]")) } test("toHiveString correctly handles UDTs") {