Skip to content

Commit 1fd9a91

Browse files
MaxGekkcloud-fan
authored andcommitted
[SPARK-31005][SQL] Support time zone ids in casting strings to timestamps
### What changes were proposed in this pull request? In the PR, I propose to change `DateTimeUtils.stringToTimestamp` to support any valid time zone id at the end of input string. After the changes, the function accepts zone ids in the formats: - no zone id. In that case, the function uses the local session time zone from the SQL config `spark.sql.session.timeZone` - -[h]h:[m]m - +[h]h:[m]m - Z - Short zone id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS - Zone ID starts with 'UTC+', 'UTC-', 'GMT+', 'GMT-', 'UT+' or 'UT-'. The ID is split in two, with a two or three letter prefix and a suffix starting with the sign. The suffix must be in the formats: - +|-h[h] - +|-hh[:]mm - +|-hh:mm:ss - +|-hhmmss - Region-based zone IDs in the form `{area}/{city}`, such as `Europe/Paris` or `America/New_York`. The default set of region ids is supplied by the IANA Time Zone Database (TZDB). ### Why are the changes needed? - To use `stringToTimestamp` as a substitution of removed `stringToTime`, see #27710 (comment) - Improve UX of Spark SQL by allowing flexible formats of zone ids. Currently, Spark accepts only `Z` and zone offsets that can be inconvenient when a time zone offset is shifted due to daylight saving rules. For instance: ```sql spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as timestamp); NULL ``` ### Does this PR introduce any user-facing change? Yes. After the changes, casting strings to timestamps allows time zone id at the end of the strings: ```sql spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as timestamp); 2015-03-18 12:03:17.123456 ``` ### How was this patch tested? - Added new test cases to the `string to timestamp` test in `DateTimeUtilsSuite`. - Run `CastSuite` and `AnsiCastSuite`. Closes #27753 from MaxGekk/stringToTimestamp-uni-zoneId. Authored-by: Maxim Gekk <max.gekk@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 807ea41 commit 1fd9a91

File tree

2 files changed

+48
-32
lines changed

2 files changed

+48
-32
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -185,28 +185,28 @@ object DateTimeUtils {
185185
* `yyyy-[m]m`
186186
* `yyyy-[m]m-[d]d`
187187
* `yyyy-[m]m-[d]d `
188-
* `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
189-
* `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
190-
* `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
191-
* `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
192-
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
193-
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
194-
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
195-
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
196-
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
197-
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
198-
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
199-
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
200-
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]`
201-
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z`
202-
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m`
203-
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m`
188+
* `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
189+
* `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
190+
* `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
191+
* `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
192+
*
193+
* where `zone_id` should have one of the forms:
194+
* - Z - Zulu time zone UTC+0
195+
* - +|-[h]h:[m]m
196+
* - A short id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS
197+
* - An id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or UT-,
198+
* and a suffix in the formats:
199+
* - +|-h[h]
200+
* - +|-hh[:]mm
201+
* - +|-hh:mm:ss
202+
* - +|-hhmmss
203+
* - Region-based zone IDs in the form `area/city`, such as `Europe/Paris`
204204
*/
205205
def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[SQLTimestamp] = {
206206
if (s == null) {
207207
return None
208208
}
209-
var tz: Option[Byte] = None
209+
var tz: Option[String] = None
210210
val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0)
211211
var i = 0
212212
var currentSegmentValue = 0
@@ -257,22 +257,21 @@ object DateTimeUtils {
257257
return None
258258
}
259259
} else if (i == 5 || i == 6) {
260-
if (b == 'Z') {
260+
if (b == '-' || b == '+') {
261261
segments(i) = currentSegmentValue
262262
currentSegmentValue = 0
263263
i += 1
264-
tz = Some(43)
265-
} else if (b == '-' || b == '+') {
264+
tz = Some(new String(bytes, j, 1))
265+
} else if (b == '.' && i == 5) {
266266
segments(i) = currentSegmentValue
267267
currentSegmentValue = 0
268268
i += 1
269-
tz = Some(b)
270-
} else if (b == '.' && i == 5) {
269+
} else {
271270
segments(i) = currentSegmentValue
272271
currentSegmentValue = 0
273272
i += 1
274-
} else {
275-
return None
273+
tz = Some(new String(bytes, j, bytes.length - j))
274+
j = bytes.length - 1
276275
}
277276
if (i == 6 && b != '.') {
278277
i += 1
@@ -312,11 +311,11 @@ object DateTimeUtils {
312311
digitsMilli -= 1
313312
}
314313
try {
315-
val zoneId = if (tz.isEmpty) {
316-
timeZoneId
317-
} else {
318-
val sign = if (tz.get.toChar == '-') -1 else 1
319-
ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8))
314+
val zoneId = tz match {
315+
case None => timeZoneId
316+
case Some("+") => ZoneOffset.ofHoursMinutes(segments(7), segments(8))
317+
case Some("-") => ZoneOffset.ofHoursMinutes(-segments(7), -segments(8))
318+
case Some(zoneName: String) => getZoneId(zoneName.trim)
320319
}
321320
val nanoseconds = MICROSECONDS.toNanos(segments(6))
322321
val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,24 +183,29 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
183183
var zoneId = getZoneId("GMT-13:53")
184184
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
185185
checkStringToTimestamp("2015-03-18T12:03:17-13:53", expected)
186+
checkStringToTimestamp("2015-03-18T12:03:17GMT-13:53", expected)
186187

187188
zoneId = getZoneId("UTC")
188189
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
189190
checkStringToTimestamp("2015-03-18T12:03:17Z", expected)
190191
checkStringToTimestamp("2015-03-18 12:03:17Z", expected)
192+
checkStringToTimestamp("2015-03-18 12:03:17UTC", expected)
191193

192194
zoneId = getZoneId("GMT-01:00")
193195
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
194196
checkStringToTimestamp("2015-03-18T12:03:17-1:0", expected)
195197
checkStringToTimestamp("2015-03-18T12:03:17-01:00", expected)
198+
checkStringToTimestamp("2015-03-18T12:03:17GMT-01:00", expected)
196199

197200
zoneId = getZoneId("GMT+07:30")
198201
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
199202
checkStringToTimestamp("2015-03-18T12:03:17+07:30", expected)
203+
checkStringToTimestamp("2015-03-18T12:03:17 GMT+07:30", expected)
200204

201205
zoneId = getZoneId("GMT+07:03")
202206
expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId))
203207
checkStringToTimestamp("2015-03-18T12:03:17+07:03", expected)
208+
checkStringToTimestamp("2015-03-18T12:03:17GMT+07:03", expected)
204209

205210
// tests for the string including milliseconds.
206211
expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zid))
@@ -213,38 +218,45 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
213218
expected = Option(date(2015, 3, 18, 12, 3, 17, 456000, zid = zoneId))
214219
checkStringToTimestamp("2015-03-18T12:03:17.456Z", expected)
215220
checkStringToTimestamp("2015-03-18 12:03:17.456Z", expected)
221+
checkStringToTimestamp("2015-03-18 12:03:17.456 UTC", expected)
216222

217223
zoneId = getZoneId("GMT-01:00")
218224
expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId))
219225
checkStringToTimestamp("2015-03-18T12:03:17.123-1:0", expected)
220226
checkStringToTimestamp("2015-03-18T12:03:17.123-01:00", expected)
227+
checkStringToTimestamp("2015-03-18T12:03:17.123 GMT-01:00", expected)
221228

222229
zoneId = getZoneId("GMT+07:30")
223230
expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId))
224231
checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected)
232+
checkStringToTimestamp("2015-03-18T12:03:17.123 GMT+07:30", expected)
225233

226234
zoneId = getZoneId("GMT+07:30")
227235
expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId))
228236
checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected)
237+
checkStringToTimestamp("2015-03-18T12:03:17.123GMT+07:30", expected)
229238

230-
zoneId = getZoneId("GMT+07:30")
231239
expected = Option(date(2015, 3, 18, 12, 3, 17, 123121, zid = zoneId))
232240
checkStringToTimestamp("2015-03-18T12:03:17.123121+7:30", expected)
241+
checkStringToTimestamp("2015-03-18T12:03:17.123121 GMT+0730", expected)
233242

234243
zoneId = getZoneId("GMT+07:30")
235244
expected = Option(date(2015, 3, 18, 12, 3, 17, 123120, zid = zoneId))
236245
checkStringToTimestamp("2015-03-18T12:03:17.12312+7:30", expected)
246+
checkStringToTimestamp("2015-03-18T12:03:17.12312 UT+07:30", expected)
237247

238248
expected = Option(time(18, 12, 15, zid = zid))
239249
checkStringToTimestamp("18:12:15", expected)
240250

241251
zoneId = getZoneId("GMT+07:30")
242252
expected = Option(time(18, 12, 15, 123120, zid = zoneId))
243253
checkStringToTimestamp("T18:12:15.12312+7:30", expected)
254+
checkStringToTimestamp("T18:12:15.12312 UTC+07:30", expected)
244255

245256
zoneId = getZoneId("GMT+07:30")
246257
expected = Option(time(18, 12, 15, 123120, zid = zoneId))
247258
checkStringToTimestamp("18:12:15.12312+7:30", expected)
259+
checkStringToTimestamp("18:12:15.12312 GMT+07:30", expected)
248260

249261
expected = Option(date(2011, 5, 6, 7, 8, 9, 100000, zid = zid))
250262
checkStringToTimestamp("2011-05-06 07:08:09.1000", expected)
@@ -270,8 +282,13 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
270282
// Truncating the fractional seconds
271283
zoneId = getZoneId("GMT+00:00")
272284
expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = zoneId))
273-
checkStringToTimestamp(
274-
"2015-03-18T12:03:17.123456789+0:00", expected)
285+
checkStringToTimestamp("2015-03-18T12:03:17.123456789+0:00", expected)
286+
checkStringToTimestamp("2015-03-18T12:03:17.123456789 UTC+0", expected)
287+
checkStringToTimestamp("2015-03-18T12:03:17.123456789GMT+00:00", expected)
288+
289+
zoneId = getZoneId("Europe/Moscow")
290+
expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = zoneId))
291+
checkStringToTimestamp("2015-03-18T12:03:17.123456 Europe/Moscow", expected)
275292
}
276293
}
277294

0 commit comments

Comments
 (0)