From 254536615bf8ea773c9d50d9f2ace08e1930ab90 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2020 22:28:09 +0300 Subject: [PATCH 01/12] Support formatting java.sql.Timestamp and java.time.Instant directly --- .../catalyst/util/TimestampFormatter.scala | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index dc06fa9d6f1c..5982e6f9aaa2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -50,7 +50,10 @@ sealed trait TimestampFormatter extends Serializable { @throws(classOf[DateTimeParseException]) @throws(classOf[DateTimeException]) def parse(s: String): Long + def format(us: Long): String + def format(ts: Timestamp): String + def format(instant: Instant): String } class Iso8601TimestampFormatter( @@ -84,9 +87,17 @@ class Iso8601TimestampFormatter( } } + override def format(instant: Instant): String = { + formatter.withZone(zoneId).format(instant) + } + override def format(us: Long): String = { val instant = DateTimeUtils.microsToInstant(us) - formatter.withZone(zoneId).format(instant) + format(instant) + } + + override def format(ts: Timestamp): String = { + legacyFormatter.format(ts) } } @@ -149,7 +160,7 @@ class LegacyFastTimestampFormatter( fastDateFormat.getTimeZone, fastDateFormat.getPattern.count(_ == 'S')) - def parse(s: String): SQLTimestamp = { + override def parse(s: String): SQLTimestamp = { cal.clear() // Clear the calendar because it can be re-used many times if (!fastDateFormat.parse(s, new ParsePosition(0), cal)) { throw new IllegalArgumentException(s"'$s' is an invalid timestamp") @@ -160,12 +171,20 @@ class LegacyFastTimestampFormatter( rebaseJulianToGregorianMicros(julianMicros) } - def format(timestamp: SQLTimestamp): String = { + override def format(timestamp: SQLTimestamp): String = { val julianMicros = rebaseGregorianToJulianMicros(timestamp) cal.setTimeInMillis(Math.floorDiv(julianMicros, MICROS_PER_SECOND) * MILLIS_PER_SECOND) cal.setMicros(Math.floorMod(julianMicros, MICROS_PER_SECOND)) fastDateFormat.format(cal) } + + override def format(ts: Timestamp): String = { + fastDateFormat.format(ts) + } + + override def format(instant: Instant): String = { + format(instantToMicros(instant)) + } } class LegacySimpleTimestampFormatter( @@ -187,6 +206,14 @@ class LegacySimpleTimestampFormatter( override def format(us: Long): String = { sdf.format(toJavaTimestamp(us)) } + + override def format(ts: Timestamp): String = { + sdf.format(ts) + } + + override def format(instant: Instant): String = { + format(instantToMicros(instant)) + } } object LegacyDateFormats extends Enumeration { From b325c21ae2096dd19502423e222ab0cc0b2c9e94 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2020 22:31:27 +0300 Subject: [PATCH 02/12] Re-use new timestamp formatting functions from toHiveSting --- .../scala/org/apache/spark/sql/execution/HiveResult.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 1a84db197044..c290b818d7d7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -83,10 +83,8 @@ object HiveResult { case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d)) case (ld: LocalDate, DateType) => dateFormatter.format(DateTimeUtils.localDateToDays(ld)) - case (t: Timestamp, TimestampType) => - timestampFormatter.format(DateTimeUtils.fromJavaTimestamp(t)) - case (i: Instant, TimestampType) => - timestampFormatter.format(DateTimeUtils.instantToMicros(i)) + case (t: Timestamp, TimestampType) => timestampFormatter.format(t) + case (i: Instant, TimestampType) => timestampFormatter.format(i) case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString case (n, _: NumericType) => n.toString From af816e6b6ac90d5a5800ed95913e62454237f1f0 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2020 22:40:22 +0300 Subject: [PATCH 03/12] Support formatting java.util.Date and java.time.LocalDate directly --- .../sql/catalyst/util/DateFormatter.scala | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index 0f79c1a6a751..7d9495509619 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -29,7 +29,10 @@ import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy._ sealed trait DateFormatter extends Serializable { def parse(s: String): Int // returns days since epoch + def format(days: Int): String + def format(date: Date): String + def format(localDate: LocalDate): String } class Iso8601DateFormatter( @@ -56,22 +59,32 @@ class Iso8601DateFormatter( } } + override def format(localDate: LocalDate): String = { + localDate.format(formatter) + } + override def format(days: Int): String = { - LocalDate.ofEpochDay(days).format(formatter) + format(LocalDate.ofEpochDay(days)) + } + + override def format(date: Date): String = { + legacyFormatter.format(date) } } trait LegacyDateFormatter extends DateFormatter { def parseToDate(s: String): Date - def formatDate(d: Date): String override def parse(s: String): Int = { fromJavaDate(new java.sql.Date(parseToDate(s).getTime)) } override def format(days: Int): String = { - val date = DateTimeUtils.toJavaDate(days) - formatDate(date) + format(DateTimeUtils.toJavaDate(days)) + } + + override def format(localDate: LocalDate): String = { + format(localDateToDays(localDate)) } } @@ -79,14 +92,14 @@ class LegacyFastDateFormatter(pattern: String, locale: Locale) extends LegacyDat @transient private lazy val fdf = FastDateFormat.getInstance(pattern, locale) override def parseToDate(s: String): Date = fdf.parse(s) - override def formatDate(d: Date): String = fdf.format(d) + override def format(d: Date): String = fdf.format(d) } class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter { @transient private lazy val sdf = new SimpleDateFormat(pattern, locale) override def parseToDate(s: String): Date = sdf.parse(s) - override def formatDate(d: Date): String = sdf.format(d) + override def format(d: Date): String = sdf.format(d) } object DateFormatter { From d8b0ed48df7683183f3eff84777d02126bff5879 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 19 May 2020 22:41:10 +0300 Subject: [PATCH 04/12] Re-use new date formatting functions from toHiveSting --- .../scala/org/apache/spark/sql/execution/HiveResult.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index c290b818d7d7..73484a212c16 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -80,9 +80,8 @@ object HiveResult { def toHiveString(a: (Any, DataType), nested: Boolean = false): String = a match { case (null, _) => if (nested) "null" else "NULL" case (b, BooleanType) => b.toString - case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d)) - case (ld: LocalDate, DateType) => - dateFormatter.format(DateTimeUtils.localDateToDays(ld)) + case (d: Date, DateType) => dateFormatter.format(d) + case (ld: LocalDate, DateType) => dateFormatter.format(ld) case (t: Timestamp, TimestampType) => timestampFormatter.format(t) case (i: Instant, TimestampType) => timestampFormatter.format(i) case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) From 6d8c2ae5c33e5a83ec9ac7ce4235e47388cee0fb Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 May 2020 11:52:59 +0300 Subject: [PATCH 05/12] Fix FractionTimestampFormatter for java.sql.Timestamp --- .../sql/catalyst/util/TimestampFormatter.scala | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 5982e6f9aaa2..0fa05f3c8fbf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -111,10 +111,26 @@ class Iso8601TimestampFormatter( */ class FractionTimestampFormatter(zoneId: ZoneId) extends Iso8601TimestampFormatter( - "", zoneId, TimestampFormatter.defaultLocale, needVarLengthSecondFraction = false) { + TimestampFormatter.defaultPattern, + zoneId, + TimestampFormatter.defaultLocale, + needVarLengthSecondFraction = false) { @transient override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter + + // Converts Timestamp to string according to Hive TimestampWritable convention. + // The code is borrowed from Spark 2.4 DateTimeUtils.timestampToString + override def format(ts: Timestamp): String = { + val timestampString = ts.toString + val formatted = legacyFormatter.format(ts) + + if (timestampString.length > 19 && timestampString.substring(19) != ".0") { + formatted + timestampString.substring(19) + } else { + formatted + } + } } /** From 1b2bc2f98de2df78ba12c4b18270be1fd166d1a6 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 May 2020 12:35:37 +0300 Subject: [PATCH 06/12] Bug fix --- .../org/apache/spark/sql/catalyst/util/TimestampFormatter.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 0fa05f3c8fbf..67d98242c44b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -195,7 +195,7 @@ class LegacyFastTimestampFormatter( } override def format(ts: Timestamp): String = { - fastDateFormat.format(ts) + format(fromJavaTimestamp(ts)) } override def format(instant: Instant): String = { From bfae42e8593a0b2dc65e84a420afe3a46a50c988 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 May 2020 12:35:49 +0300 Subject: [PATCH 07/12] Add tests --- .../sql/util/TimestampFormatterSuite.scala | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala index 5d27a6b8cce1..355cb4af1b33 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala @@ -68,9 +68,17 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId => val formatter = TimestampFormatter( "yyyy-MM-dd'T'HH:mm:ss.SSSSSS", - DateTimeUtils.getZoneId(zoneId)) + DateTimeUtils.getZoneId(zoneId), + // Test only FAST_DATE_FORMAT because other legacy formats don't support formatting + // in microsecond precision. + LegacyDateFormats.FAST_DATE_FORMAT, + needVarLengthSecondFraction = false) val timestamp = formatter.format(microsSinceEpoch) assert(timestamp === expectedTimestamp(zoneId)) + assert(formatter.format(microsToInstant(microsSinceEpoch)) === expectedTimestamp(zoneId)) + DateTimeTestUtils.withDefaultTimeZone(getZoneId(zoneId)) { + assert(formatter.format(toJavaTimestamp(microsSinceEpoch)) === expectedTimestamp(zoneId)) + } } } @@ -126,11 +134,18 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers test("format fraction of second") { val formatter = TimestampFormatter.getFractionFormatter(ZoneOffset.UTC) - assert(formatter.format(0) === "1970-01-01 00:00:00") - assert(formatter.format(1) === "1970-01-01 00:00:00.000001") - assert(formatter.format(1000) === "1970-01-01 00:00:00.001") - assert(formatter.format(900000) === "1970-01-01 00:00:00.9") - assert(formatter.format(1000000) === "1970-01-01 00:00:01") + Seq( + 0 -> "1970-01-01 00:00:00", + 1 -> "1970-01-01 00:00:00.000001", + 1000 -> "1970-01-01 00:00:00.001", + 900000 -> "1970-01-01 00:00:00.9", + 1000000 -> "1970-01-01 00:00:01").foreach { case (micros, tsStr) => + assert(formatter.format(micros) === tsStr) + assert(formatter.format(microsToInstant(micros)) === tsStr) + DateTimeTestUtils.withDefaultTimeZone(ZoneOffset.UTC) { + assert(formatter.format(toJavaTimestamp(micros)) === tsStr) + } + } } test("formatting negative years with default pattern") { From 3914313e0518169749258417cea9e29d7059c8e7 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 May 2020 12:43:18 +0300 Subject: [PATCH 08/12] Test Fraction Formatter --- .../sql/util/TimestampFormatterSuite.scala | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala index 355cb4af1b33..63e5f460894b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala @@ -57,27 +57,31 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers test("format timestamps using time zones") { val microsSinceEpoch = 1543745472001234L val expectedTimestamp = Map( - "UTC" -> "2018-12-02T10:11:12.001234", - PST.getId -> "2018-12-02T02:11:12.001234", - CET.getId -> "2018-12-02T11:11:12.001234", - "Africa/Dakar" -> "2018-12-02T10:11:12.001234", - "America/Los_Angeles" -> "2018-12-02T02:11:12.001234", - "Antarctica/Vostok" -> "2018-12-02T16:11:12.001234", - "Asia/Hong_Kong" -> "2018-12-02T18:11:12.001234", - "Europe/Amsterdam" -> "2018-12-02T11:11:12.001234") + "UTC" -> "2018-12-02 10:11:12.001234", + PST.getId -> "2018-12-02 02:11:12.001234", + CET.getId -> "2018-12-02 11:11:12.001234", + "Africa/Dakar" -> "2018-12-02 10:11:12.001234", + "America/Los_Angeles" -> "2018-12-02 02:11:12.001234", + "Antarctica/Vostok" -> "2018-12-02 16:11:12.001234", + "Asia/Hong_Kong" -> "2018-12-02 18:11:12.001234", + "Europe/Amsterdam" -> "2018-12-02 11:11:12.001234") DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId => - val formatter = TimestampFormatter( - "yyyy-MM-dd'T'HH:mm:ss.SSSSSS", - DateTimeUtils.getZoneId(zoneId), - // Test only FAST_DATE_FORMAT because other legacy formats don't support formatting - // in microsecond precision. - LegacyDateFormats.FAST_DATE_FORMAT, - needVarLengthSecondFraction = false) - val timestamp = formatter.format(microsSinceEpoch) - assert(timestamp === expectedTimestamp(zoneId)) - assert(formatter.format(microsToInstant(microsSinceEpoch)) === expectedTimestamp(zoneId)) - DateTimeTestUtils.withDefaultTimeZone(getZoneId(zoneId)) { - assert(formatter.format(toJavaTimestamp(microsSinceEpoch)) === expectedTimestamp(zoneId)) + Seq( + TimestampFormatter( + "yyyy-MM-dd HH:mm:ss.SSSSSS", + getZoneId(zoneId), + // Test only FAST_DATE_FORMAT because other legacy formats don't support formatting + // in microsecond precision. + LegacyDateFormats.FAST_DATE_FORMAT, + needVarLengthSecondFraction = false), + TimestampFormatter.getFractionFormatter(getZoneId(zoneId))).foreach { formatter => + val timestamp = formatter.format(microsSinceEpoch) + assert(timestamp === expectedTimestamp(zoneId)) + assert(formatter.format(microsToInstant(microsSinceEpoch)) === expectedTimestamp(zoneId)) + // Set the default time zone because toJavaTimestamp() depends on it. + DateTimeTestUtils.withDefaultTimeZone(getZoneId(zoneId)) { + assert(formatter.format(toJavaTimestamp(microsSinceEpoch)) === expectedTimestamp(zoneId)) + } } } } From 864efdd79bf3aa6c81504cf1691ef4f88a73b83c Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 May 2020 14:08:06 +0300 Subject: [PATCH 09/12] Add tests to DateFormatterSuite --- .../spark/sql/util/DateFormatterSuite.scala | 16 +++++++++++----- .../spark/sql/util/TimestampFormatterSuite.scala | 5 +---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala index 5e2b6a7c7faf..3954b9b8355c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala @@ -22,7 +22,7 @@ import java.time.{DateTimeException, LocalDate, ZoneOffset} import org.apache.spark.{SparkFunSuite, SparkUpgradeException} import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, localDateToDays} +import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy @@ -41,8 +41,11 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { val formatter = DateFormatter(getZoneId(timeZone)) - val date = formatter.format(17867) - assert(date === "2018-12-02") + val (days, expected) = (17867, "2018-12-02") + val date = formatter.format(days) + assert(date === expected) + assert(formatter.format(daysToLocalDate(days)) === expected) + assert(formatter.format(toJavaDate(days)) === expected) } } } @@ -70,8 +73,9 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { DateFormatter.defaultLocale, legacyFormat) val days = formatter.parse(date) - val formatted = formatter.format(days) - assert(date === formatted) + assert(date === formatter.format(days)) + assert(date === formatter.format(daysToLocalDate(days))) + assert(date === formatter.format(toJavaDate(days))) } } } @@ -170,7 +174,9 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { DateFormatter.defaultLocale, legacyFormat) assert(LocalDate.ofEpochDay(formatter.parse("1000-01-01")) === LocalDate.of(1000, 1, 1)) + assert(formatter.format(LocalDate.of(1000, 1, 1)) === "1000-01-01") assert(formatter.format(localDateToDays(LocalDate.of(1000, 1, 1))) === "1000-01-01") + assert(formatter.format(java.sql.Date.valueOf("1000-01-01")) === "1000-01-01") } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala index 63e5f460894b..1f3e9eaefb85 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala @@ -78,10 +78,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers val timestamp = formatter.format(microsSinceEpoch) assert(timestamp === expectedTimestamp(zoneId)) assert(formatter.format(microsToInstant(microsSinceEpoch)) === expectedTimestamp(zoneId)) - // Set the default time zone because toJavaTimestamp() depends on it. - DateTimeTestUtils.withDefaultTimeZone(getZoneId(zoneId)) { - assert(formatter.format(toJavaTimestamp(microsSinceEpoch)) === expectedTimestamp(zoneId)) - } + assert(formatter.format(toJavaTimestamp(microsSinceEpoch)) === expectedTimestamp(zoneId)) } } } From 29d8e5125afd48f007255dff4b3437ff0b0fa15a Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 May 2020 14:43:45 +0300 Subject: [PATCH 10/12] More tests to TimestampFormatterSuite --- .../sql/util/TimestampFormatterSuite.scala | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala index 1f3e9eaefb85..b467e24b5301 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala @@ -134,7 +134,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } test("format fraction of second") { - val formatter = TimestampFormatter.getFractionFormatter(ZoneOffset.UTC) + val formatter = TimestampFormatter.getFractionFormatter(UTC) Seq( 0 -> "1970-01-01 00:00:00", 1 -> "1970-01-01 00:00:00.000001", @@ -143,18 +143,21 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers 1000000 -> "1970-01-01 00:00:01").foreach { case (micros, tsStr) => assert(formatter.format(micros) === tsStr) assert(formatter.format(microsToInstant(micros)) === tsStr) - DateTimeTestUtils.withDefaultTimeZone(ZoneOffset.UTC) { + DateTimeTestUtils.withDefaultTimeZone(UTC) { assert(formatter.format(toJavaTimestamp(micros)) === tsStr) } } } test("formatting negative years with default pattern") { - val instant = LocalDateTime.of(-99, 1, 1, 0, 0, 0) - .atZone(ZoneOffset.UTC) - .toInstant + val instant = LocalDateTime.of(-99, 1, 1, 0, 0, 0).atZone(UTC).toInstant val micros = DateTimeUtils.instantToMicros(instant) - assert(TimestampFormatter(ZoneOffset.UTC).format(micros) === "-0099-01-01 00:00:00") + assert(TimestampFormatter(UTC).format(micros) === "-0099-01-01 00:00:00") + assert(TimestampFormatter(UTC).format(instant) === "-0099-01-01 00:00:00") + DateTimeTestUtils.withDefaultTimeZone(UTC) { // toJavaTimestamp depends on the default time zone + assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC).format(toJavaTimestamp(micros)) + === "0100-01-01 00:00:00 BC") + } } test("special timestamp values") { @@ -282,24 +285,31 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers test("SPARK-31557: rebasing in legacy formatters/parsers") { withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> LegacyBehaviorPolicy.LEGACY.toString) { - LegacyDateFormats.values.foreach { legacyFormat => - DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => - withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> zoneId.getId) { - DateTimeTestUtils.withDefaultTimeZone(zoneId) { - withClue(s"${zoneId.getId} legacyFormat = $legacyFormat") { - val formatter = TimestampFormatter( + DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> zoneId.getId) { + DateTimeTestUtils.withDefaultTimeZone(zoneId) { + withClue(s"zoneId = ${zoneId.getId}") { + val formatters = LegacyDateFormats.values.map { legacyFormat => + TimestampFormatter( TimestampFormatter.defaultPattern, zoneId, TimestampFormatter.defaultLocale, legacyFormat, needVarLengthSecondFraction = false) + }.toSeq :+ TimestampFormatter.getFractionFormatter(zoneId) + formatters.foreach { formatter => assert(microsToInstant(formatter.parse("1000-01-01 01:02:03")) .atZone(zoneId) .toLocalDateTime === LocalDateTime.of(1000, 1, 1, 1, 2, 3)) + assert(formatter.format( + LocalDateTime.of(1000, 1, 1, 1, 2, 3).atZone(zoneId).toInstant) === + "1000-01-01 01:02:03") assert(formatter.format(instantToMicros( LocalDateTime.of(1000, 1, 1, 1, 2, 3) .atZone(zoneId).toInstant)) === "1000-01-01 01:02:03") + assert(formatter.format(java.sql.Timestamp.valueOf("1000-01-01 01:02:03")) === + "1000-01-01 01:02:03") } } } From 5222a0a1d6b495ddb4d7fd961d821fbabe624956 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 May 2020 17:06:20 +0300 Subject: [PATCH 11/12] Re-gen postgreSQL/date.sql.out --- .../test/resources/sql-tests/results/postgreSQL/date.sql.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out index 1d862ba8a41a..151fa1e28d72 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out @@ -584,7 +584,7 @@ select make_date(-44, 3, 15) -- !query schema struct -- !query output --0044-03-15 +0045-03-15 -- !query From 31e8ffdcedfc417909f842f469c3e86889a8af42 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 May 2020 17:08:31 +0300 Subject: [PATCH 12/12] Update comment --- .../apache/spark/sql/catalyst/util/TimestampFormatter.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 67d98242c44b..1a6e5e4400ff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -119,8 +119,9 @@ class FractionTimestampFormatter(zoneId: ZoneId) @transient override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter - // Converts Timestamp to string according to Hive TimestampWritable convention. - // The code is borrowed from Spark 2.4 DateTimeUtils.timestampToString + // The new formatter will omit the trailing 0 in the timestamp string, but the legacy formatter + // can't. Here we borrow the code from Spark 2.4 DateTimeUtils.timestampToString to omit the + // trailing 0 for the legacy formatter as well. override def format(ts: Timestamp): String = { val timestampString = ts.toString val formatted = legacyFormatter.format(ts)