diff --git a/docs/sql-ref-datetime-pattern.md b/docs/sql-ref-datetime-pattern.md index 5859ad82525f..9dbcdbb5a799 100644 --- a/docs/sql-ref-datetime-pattern.md +++ b/docs/sql-ref-datetime-pattern.md @@ -40,7 +40,7 @@ Spark uses pattern letters in the following table for date and timestamp parsing |**w**|week-of-week-based-year|number(2)|27| |**W**|week-of-month|number(1)|4| |**E**|day-of-week|text|Tue; Tuesday| -|**u**|localized day-of-week|number/text|2; 02; Tue; Tuesday| +|**u**|day-of-week|number/text|2; 02; Tue; Tuesday| |**F**|week-of-month|number(1)|3| |**a**|am-pm-of-day|am-pm|PM| |**h**|clock-hour-of-am-pm (1-12)|number(2)|12| @@ -63,7 +63,10 @@ Spark uses pattern letters in the following table for date and timestamp parsing The count of pattern letters determines the format. -- Text: The text style is determined based on the number of pattern letters used. Less than 4 pattern letters will use the short form. Exactly 4 pattern letters will use the full form. Exactly 5 pattern letters will use the narrow form. 5 or more letters will fail. +- Text: The text style is determined based on the number of pattern letters used. Less than 4 pattern letters will use the short form. Exactly 4 pattern letters will use the full form. Exactly 5 pattern letters will use the narrow form. 5 or more letters will fail. More details for the text style: + + - Short Form: Short text, typically an abbreviation. For example, day-of-week Monday might output "Mon". + - Full Form: Full text, typically the full description. For example, day-of-week Monday might output "Monday". - Number(n): The n here represents the maximum count of letters this type of datetime pattern can be used. If the count of letters is one, then the value is output using the minimum number of digits and without padding. Otherwise, the count of digits is used as the width of the output field, with the value zero-padded as necessary. @@ -138,9 +141,3 @@ The count of pattern letters determines the format. An optional section is started by `[` and ended using `]` (or at the end of the pattern). - Symbols of 'Y', 'W', 'w', 'E', 'u', 'F', 'q' and 'Q' can only be used for datetime formatting, e.g. `date_format`. They are not allowed used for datetime parsing, e.g. `to_timestamp`. - -More details for the text style: - -- Short Form: Short text, typically an abbreviation. For example, day-of-week Monday might output "Mon". - -- Full Form: Full text, typically the full description. For example, day-of-week Monday might output "Monday". diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index 6d225ad9b764..4e08860e3f75 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -117,7 +117,9 @@ class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends LegacyD object DateFormatter { import LegacyDateFormats._ - val defaultLocale: Locale = Locale.US + import DateTimeFormatterHelper._ + + presetSundayStartToMondayStart() val defaultPattern: String = "yyyy-MM-dd" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index eeb56aa9821c..3dbb7304cfa4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -17,11 +17,13 @@ package org.apache.spark.sql.catalyst.util +import java.lang.reflect.{Field, Modifier} import java.time._ import java.time.chrono.IsoChronology import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle} -import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries} +import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries, WeekFields} import java.util.Locale +import java.util.concurrent.ConcurrentHashMap import com.google.common.cache.CacheBuilder @@ -155,7 +157,22 @@ trait DateTimeFormatterHelper { } } -private object DateTimeFormatterHelper { +private[spark] object DateTimeFormatterHelper { + val defaultLocale = Locale.US + + def presetSundayStartToMondayStart(): Unit = { + val CACHE: Field = classOf[WeekFields].getDeclaredField("CACHE") + CACHE.setAccessible(true) + val modifiers: Field = CACHE.getClass.getDeclaredField("modifiers") + modifiers.setAccessible(true) + modifiers.setInt(CACHE, CACHE.getModifiers & ~Modifier.FINAL) + val newCache = new ConcurrentHashMap[String, WeekFields]() + // Preset the Sunday start entry to ISO-based Monday start instance for retrieving first day + // of week + newCache.put(DayOfWeek.SUNDAY.toString + 1, WeekFields.ISO) + CACHE.set(null, newCache) + } + val cache = CacheBuilder.newBuilder() .maximumSize(128) .build[(String, Locale, Boolean), DateTimeFormatter]() @@ -222,7 +239,7 @@ private object DateTimeFormatterHelper { .appendValue(ChronoField.MINUTE_OF_HOUR, 2).appendLiteral(':') .appendValue(ChronoField.SECOND_OF_MINUTE, 2) .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true) - toFormatter(builder, TimestampFormatter.defaultLocale) + toFormatter(builder, defaultLocale) } private final val bugInStandAloneForm = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 97ecc430af4a..788fe27f2a13 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -29,6 +29,7 @@ import java.util.concurrent.TimeUnit.SECONDS import org.apache.commons.lang3.time.FastDateFormat import org.apache.spark.sql.catalyst.util.DateTimeConstants._ +import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.LegacyDateFormats.{LegacyDateFormat, LENIENT_SIMPLE_DATE_FORMAT} import org.apache.spark.sql.catalyst.util.RebaseDateTime._ @@ -120,7 +121,7 @@ class FractionTimestampFormatter(zoneId: ZoneId) extends Iso8601TimestampFormatter( TimestampFormatter.defaultPattern, zoneId, - TimestampFormatter.defaultLocale, + defaultLocale, LegacyDateFormats.FAST_DATE_FORMAT, needVarLengthSecondFraction = false) { @@ -278,7 +279,7 @@ object LegacyDateFormats extends Enumeration { object TimestampFormatter { import LegacyDateFormats._ - val defaultLocale: Locale = Locale.US + presetSundayStartToMondayStart() def defaultPattern(): String = s"${DateFormatter.defaultPattern} HH:mm:ss" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala index 22a1396d5efd..b0ac291ad98c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala @@ -21,7 +21,7 @@ import java.time.{DateTimeException, LocalDate} import org.apache.spark.{SparkFunSuite, SparkUpgradeException} import org.apache.spark.sql.catalyst.plans.SQLHelper -import org.apache.spark.sql.catalyst.util.{DateFormatter, LegacyDateFormats} +import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeFormatterHelper, LegacyDateFormats} import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf @@ -71,7 +71,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { val formatter = DateFormatter( DateFormatter.defaultPattern, getZoneId(timeZone), - DateFormatter.defaultLocale, + DateTimeFormatterHelper.defaultLocale, legacyFormat, isParsing = false) val days = formatter.parse(date) @@ -106,7 +106,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { val formatter = DateFormatter( DateFormatter.defaultPattern, getZoneId(timeZone), - DateFormatter.defaultLocale, + DateTimeFormatterHelper.defaultLocale, legacyFormat, isParsing = false) val date = formatter.format(days) @@ -174,7 +174,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { val formatter = DateFormatter( DateFormatter.defaultPattern, getZoneId(timeZone), - DateFormatter.defaultLocale, + DateTimeFormatterHelper.defaultLocale, legacyFormat, isParsing = false) assert(LocalDate.ofEpochDay(formatter.parse("1000-01-01")) === LocalDate.of(1000, 1, 1)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala index 1530ac4e24da..8f6af56c0488 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala @@ -25,6 +25,7 @@ import org.scalatest.Matchers import org.apache.spark.{SparkFunSuite, SparkUpgradeException} import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.util.{LegacyDateFormats, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf @@ -290,7 +291,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers TimestampFormatter( TimestampFormatter.defaultPattern, zoneId, - TimestampFormatter.defaultLocale, + defaultLocale, legacyFormat, isParsing = false) }.toSeq :+ TimestampFormatter.getFractionFormatter(zoneId) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 9f99bf501156..f17de0950a06 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDate, ZoneOffset} import org.apache.spark.sql.Row -import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, LegacyDateFormats, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeFormatterHelper, DateTimeUtils, LegacyDateFormats, TimestampFormatter} import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand} import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec} import org.apache.spark.sql.internal.SQLConf @@ -84,7 +84,7 @@ object HiveResult { format = DateFormatter.defaultPattern, // We can set any time zone id. UTC was taken for simplicity. zoneId = ZoneOffset.UTC, - locale = DateFormatter.defaultLocale, + locale = DateTimeFormatterHelper.defaultLocale, // Use `FastDateFormat` as the legacy formatter because it is thread-safe. legacyFormat = LegacyDateFormats.FAST_DATE_FORMAT, isParsing = false) diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 9bd936f6f441..908b0066b4d4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -164,3 +164,8 @@ select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy select from_unixtime(1, 'yyyyyyyyyyy-MM-dd'); select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss'); select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd'); + +select date_format(date '2019-12-29', 'YYYY-ww-uu'); +select date_format('2020-01-01', 'YYYY-MM-dd uu'); +select date_format('2020-01-01', 'YYYY-MM-dd uuuu'); +SELECT to_csv(named_struct('time', to_timestamp('2019-12-29', 'yyyy-MM-dd')), map('timestampFormat', 'YYYY-ww-uu', 'locale', 'en-US')); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index ca04b008d653..6bc1de836336 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 119 +-- Number of queries: 123 -- !query @@ -1025,3 +1025,35 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format(date '2019-12-29', 'YYYY-ww-uu') +-- !query schema +struct +-- !query output +2019-52-07 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uu') +-- !query schema +struct +-- !query output +2020-01-01 03 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uuuu') +-- !query schema +struct +-- !query output +2020-01-01 Wednesday + + +-- !query +SELECT to_csv(named_struct('time', to_timestamp('2019-12-29', 'yyyy-MM-dd')), map('timestampFormat', 'YYYY-ww-uu', 'locale', 'en-US')) +-- !query schema +struct +-- !query output +2019-52-07 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index fe932d3a706a..313f4da30bd1 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 119 +-- Number of queries: 123 -- !query @@ -980,3 +980,35 @@ select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd') struct -- !query output 00000002018-11-17 + + +-- !query +select date_format(date '2019-12-29', 'YYYY-ww-uu') +-- !query schema +struct +-- !query output +2020-01-07 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uu') +-- !query schema +struct +-- !query output +2020-01-01 03 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uuuu') +-- !query schema +struct +-- !query output +2020-01-01 0003 + + +-- !query +SELECT to_csv(named_struct('time', to_timestamp('2019-12-29', 'yyyy-MM-dd')), map('timestampFormat', 'YYYY-ww-uu', 'locale', 'en-US')) +-- !query schema +struct +-- !query output +2020-01-07 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 06a41da2671e..8ec1e5153c4a 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 119 +-- Number of queries: 123 -- !query @@ -997,3 +997,35 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format(date '2019-12-29', 'YYYY-ww-uu') +-- !query schema +struct +-- !query output +2019-52-07 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uu') +-- !query schema +struct +-- !query output +2020-01-01 03 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uuuu') +-- !query schema +struct +-- !query output +2020-01-01 Wednesday + + +-- !query +SELECT to_csv(named_struct('time', to_timestamp('2019-12-29', 'yyyy-MM-dd')), map('timestampFormat', 'YYYY-ww-uu', 'locale', 'en-US')) +-- !query schema +struct +-- !query output +2019-52-07