From 40b4e6b68d1a1a2926ba827fd9961cc0b778cf91 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 1 Mar 2020 21:34:15 +0300 Subject: [PATCH 1/4] Add tests --- .../catalyst/util/DateTimeUtilsSuite.scala | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 1465b066434b..ca051bcaa444 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -183,16 +183,19 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { var zoneId = getZoneId("GMT-13:53") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17-13:53", expected) + checkStringToTimestamp("2015-03-18T12:03:17GMT-13:53", expected) zoneId = getZoneId("UTC") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17Z", expected) checkStringToTimestamp("2015-03-18 12:03:17Z", expected) + checkStringToTimestamp("2015-03-18 12:03:17UTC", expected) zoneId = getZoneId("GMT-01:00") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17-1:0", expected) checkStringToTimestamp("2015-03-18T12:03:17-01:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17GMT-01:00", expected) zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) @@ -201,6 +204,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { zoneId = getZoneId("GMT+07:03") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17+07:03", expected) + checkStringToTimestamp("2015-03-18T12:03:17GMT+07:03", expected) // tests for the string including milliseconds. expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zid)) @@ -213,27 +217,22 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { expected = Option(date(2015, 3, 18, 12, 3, 17, 456000, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.456Z", expected) checkStringToTimestamp("2015-03-18 12:03:17.456Z", expected) + checkStringToTimestamp("2015-03-18 12:03:17.456UTC", expected) zoneId = getZoneId("GMT-01:00") expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123-1:0", expected) checkStringToTimestamp("2015-03-18T12:03:17.123-01:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123GMT-01:00", expected) zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123GMT+07:30", expected) - zoneId = getZoneId("GMT+07:30") - expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId)) - checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected) - - zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, 123121, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123121+7:30", expected) - - zoneId = getZoneId("GMT+07:30") - expected = Option(date(2015, 3, 18, 12, 3, 17, 123120, zid = zoneId)) - checkStringToTimestamp("2015-03-18T12:03:17.12312+7:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123121GMT+07:30", expected) expected = Option(time(18, 12, 15, zid = zid)) checkStringToTimestamp("18:12:15", expected) @@ -241,10 +240,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { zoneId = getZoneId("GMT+07:30") expected = Option(time(18, 12, 15, 123120, zid = zoneId)) checkStringToTimestamp("T18:12:15.12312+7:30", expected) - - zoneId = getZoneId("GMT+07:30") - expected = Option(time(18, 12, 15, 123120, zid = zoneId)) - checkStringToTimestamp("18:12:15.12312+7:30", expected) + checkStringToTimestamp("T18:12:15.12312GMT+07:30", expected) expected = Option(date(2011, 5, 6, 7, 8, 9, 100000, zid = zid)) checkStringToTimestamp("2011-05-06 07:08:09.1000", expected) @@ -270,8 +266,12 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { // Truncating the fractional seconds zoneId = getZoneId("GMT+00:00") expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = zoneId)) - checkStringToTimestamp( - "2015-03-18T12:03:17.123456789+0:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123456789+0:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123456789GMT+00:00", expected) + + zoneId = getZoneId("Europe/Moscow") + expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = zoneId)) + checkStringToTimestamp("2015-03-18T12:03:17.123456 Europe/Moscow", expected) } } From f10733147152e0b2ba7e5a298386e15b84eda421 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 1 Mar 2020 21:34:51 +0300 Subject: [PATCH 2/4] Support time zone ids in stringToTimestamp --- .../sql/catalyst/util/DateTimeUtils.scala | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 731aa3502753..40544c17ee63 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -191,7 +191,7 @@ object DateTimeUtils { if (s == null) { return None } - var tz: Option[Byte] = None + var tz: Option[String] = None val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0) var i = 0 var currentSegmentValue = 0 @@ -242,22 +242,21 @@ object DateTimeUtils { return None } } else if (i == 5 || i == 6) { - if (b == 'Z') { + if (b == '-' || b == '+') { segments(i) = currentSegmentValue currentSegmentValue = 0 i += 1 - tz = Some(43) - } else if (b == '-' || b == '+') { + tz = Some(new String(bytes, j, 1)) + } else if (b == '.' && i == 5) { segments(i) = currentSegmentValue currentSegmentValue = 0 i += 1 - tz = Some(b) - } else if (b == '.' && i == 5) { + } else { segments(i) = currentSegmentValue currentSegmentValue = 0 i += 1 - } else { - return None + tz = Some(new String(bytes, j, bytes.length - j)) + j = bytes.length - 1 } if (i == 6 && b != '.') { i += 1 @@ -297,11 +296,11 @@ object DateTimeUtils { digitsMilli -= 1 } try { - val zoneId = if (tz.isEmpty) { - timeZoneId - } else { - val sign = if (tz.get.toChar == '-') -1 else 1 - ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8)) + val zoneId = tz match { + case None => timeZoneId + case Some("+") => ZoneOffset.ofHoursMinutes(segments(7), segments(8)) + case Some("-") => ZoneOffset.ofHoursMinutes(-segments(7), -segments(8)) + case Some(zoneName: String) => getZoneId(zoneName.trim) } val nanoseconds = MICROSECONDS.toNanos(segments(6)) val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt) From bdfe5f3f77125d864a19371ecfbff53f06c3e568 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 3 Mar 2020 14:36:31 +0300 Subject: [PATCH 3/4] Update the comment for stringToTimestamp --- .../sql/catalyst/util/DateTimeUtils.scala | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 40544c17ee63..3dad9d78620b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -170,22 +170,22 @@ object DateTimeUtils { * `yyyy-[m]m` * `yyyy-[m]m-[d]d` * `yyyy-[m]m-[d]d ` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` - * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` - * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` - * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` - * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` - * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` - * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` - * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` - * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` + * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * + * where `zone_id` should have one of the forms: + * - Z - Zulu time zone UTC+0 + * - +|-[h]h:[m]m + * - A short id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS + * - An id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or UT-, + * and a suffix in the formats: + * - +|-h[h] + * - +|-hh[:]mm + * - +|-hh:mm:ss + * - +|-hhmmss + * - Region-based zone IDs in the form `area/city`, such as `Europe/Paris` */ def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[SQLTimestamp] = { if (s == null) { From 00a07ab500b63cb13edcd8bf71ad709d008e2d5b Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 4 Mar 2020 17:12:52 +0300 Subject: [PATCH 4/4] Add more tests --- .../catalyst/util/DateTimeUtilsSuite.scala | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index ca051bcaa444..6dde3d2fd94d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -200,6 +200,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17+07:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17 GMT+07:30", expected) zoneId = getZoneId("GMT+07:03") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) @@ -217,13 +218,18 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { expected = Option(date(2015, 3, 18, 12, 3, 17, 456000, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.456Z", expected) checkStringToTimestamp("2015-03-18 12:03:17.456Z", expected) - checkStringToTimestamp("2015-03-18 12:03:17.456UTC", expected) + checkStringToTimestamp("2015-03-18 12:03:17.456 UTC", expected) zoneId = getZoneId("GMT-01:00") expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123-1:0", expected) checkStringToTimestamp("2015-03-18T12:03:17.123-01:00", expected) - checkStringToTimestamp("2015-03-18T12:03:17.123GMT-01:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123 GMT-01:00", expected) + + zoneId = getZoneId("GMT+07:30") + expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId)) + checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123 GMT+07:30", expected) zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId)) @@ -232,7 +238,12 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { expected = Option(date(2015, 3, 18, 12, 3, 17, 123121, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123121+7:30", expected) - checkStringToTimestamp("2015-03-18T12:03:17.123121GMT+07:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123121 GMT+0730", expected) + + zoneId = getZoneId("GMT+07:30") + expected = Option(date(2015, 3, 18, 12, 3, 17, 123120, zid = zoneId)) + checkStringToTimestamp("2015-03-18T12:03:17.12312+7:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17.12312 UT+07:30", expected) expected = Option(time(18, 12, 15, zid = zid)) checkStringToTimestamp("18:12:15", expected) @@ -240,7 +251,12 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { zoneId = getZoneId("GMT+07:30") expected = Option(time(18, 12, 15, 123120, zid = zoneId)) checkStringToTimestamp("T18:12:15.12312+7:30", expected) - checkStringToTimestamp("T18:12:15.12312GMT+07:30", expected) + checkStringToTimestamp("T18:12:15.12312 UTC+07:30", expected) + + zoneId = getZoneId("GMT+07:30") + expected = Option(time(18, 12, 15, 123120, zid = zoneId)) + checkStringToTimestamp("18:12:15.12312+7:30", expected) + checkStringToTimestamp("18:12:15.12312 GMT+07:30", expected) expected = Option(date(2011, 5, 6, 7, 8, 9, 100000, zid = zid)) checkStringToTimestamp("2011-05-06 07:08:09.1000", expected) @@ -267,6 +283,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { zoneId = getZoneId("GMT+00:00") expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123456789+0:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123456789 UTC+0", expected) checkStringToTimestamp("2015-03-18T12:03:17.123456789GMT+00:00", expected) zoneId = getZoneId("Europe/Moscow")