diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index adc69ab1c652..9535a369cb2e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -20,6 +20,8 @@ package org.apache.spark.sql.catalyst.util import java.time.{Instant, ZoneId} import java.util.Locale +import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToDays + sealed trait DateFormatter extends Serializable { def parse(s: String): Int // returns days since epoch def format(days: Int): String @@ -38,11 +40,7 @@ class Iso8601DateFormatter( toInstantWithZoneId(temporalAccessor, UTC) } - override def parse(s: String): Int = { - val seconds = toInstant(s).getEpochSecond - val days = Math.floorDiv(seconds, DateTimeUtils.SECONDS_PER_DAY) - days.toInt - } + override def parse(s: String): Int = instantToDays(toInstant(s)) override def format(days: Int): String = { val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index da8899a02f31..867647921349 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -18,13 +18,12 @@ package org.apache.spark.sql.catalyst.util import java.sql.{Date, Timestamp} -import java.text.{DateFormat, SimpleDateFormat} +import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZonedDateTime} +import java.time.Year.isLeap +import java.time.temporal.IsoFields import java.util.{Calendar, Locale, TimeZone} -import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.{ConcurrentHashMap, TimeUnit} import java.util.function.{Function => JFunction} -import javax.xml.bind.DatatypeConverter - -import scala.annotation.tailrec import org.apache.spark.unsafe.types.UTF8String @@ -53,30 +52,12 @@ object DateTimeUtils { final val NANOS_PER_MICROS = 1000L final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L - // number of days in 400 years by Gregorian calendar - final val daysIn400Years: Int = 146097 - - // In the Julian calendar every year that is exactly divisible by 4 is a leap year without any - // exception. But in the Gregorian calendar every year that is exactly divisible by four - // is a leap year, except for years that are exactly divisible by 100, but these centurial years - // are leap years if they are exactly divisible by 400. - // So there are 3 extra days in the Julian calendar within a 400 years cycle compared to the - // Gregorian calendar. - final val extraLeapDaysIn400YearsJulian = 3 - - // number of days in 400 years by Julian calendar - final val daysIn400YearsInJulian: Int = daysIn400Years + extraLeapDaysIn400YearsJulian - // number of days between 1.1.1970 and 1.1.2001 final val to2001 = -11323 // this is year -17999, calculation: 50 * daysIn400Year final val YearZero = -17999 final val toYearZero = to2001 + 7304850 - - // days to year -17999 in Julian calendar - final val toYearZeroInJulian = toYearZero + 49 * extraLeapDaysIn400YearsJulian - final val TimeZoneGMT = TimeZone.getTimeZone("GMT") final val TimeZoneUTC = TimeZone.getTimeZone("UTC") final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12) @@ -85,13 +66,6 @@ object DateTimeUtils { def defaultTimeZone(): TimeZone = TimeZone.getDefault() - // Reuse the Calendar object in each thread as it is expensive to create in each method call. - private val threadLocalGmtCalendar = new ThreadLocal[Calendar] { - override protected def initialValue: Calendar = { - Calendar.getInstance(TimeZoneGMT) - } - } - private val computedTimeZones = new ConcurrentHashMap[String, TimeZone] private val computeTimeZone = new JFunction[String, TimeZone] { override def apply(timeZoneId: String): TimeZone = TimeZone.getTimeZone(timeZoneId) @@ -136,27 +110,6 @@ object DateTimeUtils { } } - @tailrec - def stringToTime(s: String): java.util.Date = { - val indexOfGMT = s.indexOf("GMT") - if (indexOfGMT != -1) { - // ISO8601 with a weird time zone specifier (2000-01-01T00:00GMT+01:00) - val s0 = s.substring(0, indexOfGMT) - val s1 = s.substring(indexOfGMT + 3) - // Mapped to 2000-01-01T00:00+01:00 - stringToTime(s0 + s1) - } else if (!s.contains('T')) { - // JDBC escape string - if (s.contains(' ')) { - Timestamp.valueOf(s) - } else { - Date.valueOf(s) - } - } else { - DatatypeConverter.parseDateTime(s).getTime() - } - } - /** * Returns the number of days since epoch from java.sql.Date. */ @@ -388,23 +341,35 @@ object DateTimeUtils { return None } - val c = if (tz.isEmpty) { - Calendar.getInstance(timeZone) + val zoneId = if (tz.isEmpty) { + timeZone.toZoneId } else { - Calendar.getInstance( - getTimeZone(f"GMT${tz.get.toChar}${segments(7)}%02d:${segments(8)}%02d")) + getTimeZone(f"GMT${tz.get.toChar}${segments(7)}%02d:${segments(8)}%02d").toZoneId } - c.set(Calendar.MILLISECOND, 0) - - if (justTime) { - c.set(Calendar.HOUR_OF_DAY, segments(3)) - c.set(Calendar.MINUTE, segments(4)) - c.set(Calendar.SECOND, segments(5)) + val nanoseconds = TimeUnit.MICROSECONDS.toNanos(segments(6)) + val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt) + val localDate = if (justTime) { + LocalDate.now(zoneId) } else { - c.set(segments(0), segments(1) - 1, segments(2), segments(3), segments(4), segments(5)) + LocalDate.of(segments(0), segments(1), segments(2)) } + val localDateTime = LocalDateTime.of(localDate, localTime) + val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId) + val instant = Instant.from(zonedDateTime) + + Some(instantToMicros(instant)) + } + + def instantToMicros(instant: Instant): Long = { + val sec = Math.multiplyExact(instant.getEpochSecond, MICROS_PER_SECOND) + val result = Math.addExact(sec, instant.getNano / NANOS_PER_MICROS) + result + } - Some(c.getTimeInMillis * 1000 + segments(6)) + def instantToDays(instant: Instant): Int = { + val seconds = instant.getEpochSecond + val days = Math.floorDiv(seconds, SECONDS_PER_DAY) + days.toInt } /** @@ -457,11 +422,9 @@ object DateTimeUtils { return None } - val c = threadLocalGmtCalendar.get() - c.clear() - c.set(segments(0), segments(1) - 1, segments(2), 0, 0, 0) - c.set(Calendar.MILLISECOND, 0) - Some((c.getTimeInMillis / MILLIS_PER_DAY).toInt) + val localDate = LocalDate.of(segments(0), segments(1), segments(2)) + val instant = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toInstant + Some(instantToDays(instant)) } /** @@ -472,9 +435,9 @@ object DateTimeUtils { return true } if (month == 2) { - if (isLeapYear(year) && day > 29) { + if (isLeap(year) && day > 29) { return true - } else if (!isLeapYear(year) && day > 28) { + } else if (!isLeap(year) && day > 28) { return true } } else if (!MonthOf31Days.contains(month) && day > 30) { @@ -544,78 +507,12 @@ object DateTimeUtils { ((localTimestamp(microsec, timeZone) / MICROS_PER_SECOND) % 60).toInt } - private[this] def isLeapYear(year: Int): Boolean = { - (year % 4) == 0 && ((year % 100) != 0 || (year % 400) == 0) - } - - /** - * Return the number of days since the start of 400 year period. - * The second year of a 400 year period (year 1) starts on day 365. - */ - private[this] def yearBoundary(year: Int, isGregorian: Boolean): Int = { - if (isGregorian) { - year * 365 + ((year / 4) - (year / 100) + (year / 400)) - } else { - year * 365 + (year / 4) - } - } - - /** - * Calculates the number of years for the given number of days. This depends - * on a 400 year period. - * @param days days since the beginning of the 400 year period - * @param isGregorian indicates whether leap years should be calculated according to Gregorian - * (or Julian) calendar - * @return (number of year, days in year) - */ - private[this] def numYears(days: Int, isGregorian: Boolean): (Int, Int) = { - val year = days / 365 - val boundary = yearBoundary(year, isGregorian) - if (days > boundary) { - (year, days - boundary) - } else { - (year - 1, days - yearBoundary(year - 1, isGregorian)) - } - } - - /** - * Calculates the year and the number of the day in the year for the given - * number of days. The given days is the number of days since 1.1.1970. - * - * The calculation uses the fact that the period 1.1.2001 until 31.12.2400 is - * equals to the period 1.1.1601 until 31.12.2000. - */ - private[this] def getYearAndDayInYear(daysSince1970: SQLDate): (Int, Int) = { - // Since Julian calendar was replaced with the Gregorian calendar, - // the 10 days after Oct. 4 were skipped. - // (1582-10-04) -141428 days since 1970-01-01 - if (daysSince1970 <= -141428) { - getYearAndDayInYear(daysSince1970 - 10, toYearZeroInJulian, daysIn400YearsInJulian, false) - } else { - getYearAndDayInYear(daysSince1970, toYearZero, daysIn400Years, true) - } - } - - private def getYearAndDayInYear( - daysSince1970: SQLDate, - toYearZero: SQLDate, - daysIn400Years: SQLDate, - isGregorian: Boolean): (Int, Int) = { - // add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999) - val daysNormalized = daysSince1970 + toYearZero - val numOfQuarterCenturies = daysNormalized / daysIn400Years - val daysInThis400 = daysNormalized % daysIn400Years + 1 - val (years, dayInYear) = numYears(daysInThis400, isGregorian) - val year: Int = (2001 - 20000) + 400 * numOfQuarterCenturies + years - (year, dayInYear) - } - /** * Returns the 'day in year' value for the given date. The date is expressed in days * since 1.1.1970. */ def getDayInYear(date: SQLDate): Int = { - getYearAndDayInYear(date)._2 + LocalDate.ofEpochDay(date).getDayOfYear } /** @@ -623,7 +520,7 @@ object DateTimeUtils { * since 1.1.1970. */ def getYear(date: SQLDate): Int = { - getYearAndDayInYear(date)._1 + LocalDate.ofEpochDay(date).getYear } /** @@ -631,19 +528,7 @@ object DateTimeUtils { * since 1.1.1970. */ def getQuarter(date: SQLDate): Int = { - var (year, dayInYear) = getYearAndDayInYear(date) - if (isLeapYear(year)) { - dayInYear = dayInYear - 1 - } - if (dayInYear <= 90) { - 1 - } else if (dayInYear <= 181) { - 2 - } else if (dayInYear <= 273) { - 3 - } else { - 4 - } + LocalDate.ofEpochDay(date).get(IsoFields.QUARTER_OF_YEAR) } /** @@ -651,43 +536,8 @@ object DateTimeUtils { * year, month (Jan is Month 1), dayInMonth, daysToMonthEnd (0 if it's last day of month). */ def splitDate(date: SQLDate): (Int, Int, Int, Int) = { - var (year, dayInYear) = getYearAndDayInYear(date) - val isLeap = isLeapYear(year) - if (isLeap && dayInYear == 60) { - (year, 2, 29, 0) - } else { - if (isLeap && dayInYear > 60) dayInYear -= 1 - - if (dayInYear <= 181) { - if (dayInYear <= 31) { - (year, 1, dayInYear, 31 - dayInYear) - } else if (dayInYear <= 59) { - (year, 2, dayInYear - 31, if (isLeap) 60 - dayInYear else 59 - dayInYear) - } else if (dayInYear <= 90) { - (year, 3, dayInYear - 59, 90 - dayInYear) - } else if (dayInYear <= 120) { - (year, 4, dayInYear - 90, 120 - dayInYear) - } else if (dayInYear <= 151) { - (year, 5, dayInYear - 120, 151 - dayInYear) - } else { - (year, 6, dayInYear - 151, 181 - dayInYear) - } - } else { - if (dayInYear <= 212) { - (year, 7, dayInYear - 181, 212 - dayInYear) - } else if (dayInYear <= 243) { - (year, 8, dayInYear - 212, 243 - dayInYear) - } else if (dayInYear <= 273) { - (year, 9, dayInYear - 243, 273 - dayInYear) - } else if (dayInYear <= 304) { - (year, 10, dayInYear - 273, 304 - dayInYear) - } else if (dayInYear <= 334) { - (year, 11, dayInYear - 304, 334 - dayInYear) - } else { - (year, 12, dayInYear - 334, 365 - dayInYear) - } - } - } + val ld = LocalDate.ofEpochDay(date) + (ld.getYear, ld.getMonthValue, ld.getDayOfMonth, ld.lengthOfMonth() - ld.getDayOfMonth) } /** @@ -695,40 +545,7 @@ object DateTimeUtils { * since 1.1.1970. January is month 1. */ def getMonth(date: SQLDate): Int = { - var (year, dayInYear) = getYearAndDayInYear(date) - if (isLeapYear(year)) { - if (dayInYear == 60) { - return 2 - } else if (dayInYear > 60) { - dayInYear = dayInYear - 1 - } - } - - if (dayInYear <= 31) { - 1 - } else if (dayInYear <= 59) { - 2 - } else if (dayInYear <= 90) { - 3 - } else if (dayInYear <= 120) { - 4 - } else if (dayInYear <= 151) { - 5 - } else if (dayInYear <= 181) { - 6 - } else if (dayInYear <= 212) { - 7 - } else if (dayInYear <= 243) { - 8 - } else if (dayInYear <= 273) { - 9 - } else if (dayInYear <= 304) { - 10 - } else if (dayInYear <= 334) { - 11 - } else { - 12 - } + LocalDate.ofEpochDay(date).getMonthValue } /** @@ -736,40 +553,7 @@ object DateTimeUtils { * since 1.1.1970. */ def getDayOfMonth(date: SQLDate): Int = { - var (year, dayInYear) = getYearAndDayInYear(date) - if (isLeapYear(year)) { - if (dayInYear == 60) { - return 29 - } else if (dayInYear > 60) { - dayInYear = dayInYear - 1 - } - } - - if (dayInYear <= 31) { - dayInYear - } else if (dayInYear <= 59) { - dayInYear - 31 - } else if (dayInYear <= 90) { - dayInYear - 59 - } else if (dayInYear <= 120) { - dayInYear - 90 - } else if (dayInYear <= 151) { - dayInYear - 120 - } else if (dayInYear <= 181) { - dayInYear - 151 - } else if (dayInYear <= 212) { - dayInYear - 181 - } else if (dayInYear <= 243) { - dayInYear - 212 - } else if (dayInYear <= 273) { - dayInYear - 243 - } else if (dayInYear <= 304) { - dayInYear - 273 - } else if (dayInYear <= 334) { - dayInYear - 304 - } else { - dayInYear - 334 - } + LocalDate.ofEpochDay(date).getDayOfMonth } /** @@ -785,7 +569,7 @@ object DateTimeUtils { val absoluteYear = absoluteMonth / 12 var monthInYear = absoluteMonth - absoluteYear * 12 var date = getDateFromYear(absoluteYear) - if (monthInYear >= 2 && isLeapYear(absoluteYear + YearZero)) { + if (monthInYear >= 2 && isLeap(absoluteYear + YearZero)) { date += 1 } while (monthInYear > 0) { @@ -816,7 +600,7 @@ object DateTimeUtils { val currentMonthInYear = nonNegativeMonth % 12 val currentYear = nonNegativeMonth / 12 - val leapDay = if (currentMonthInYear == 1 && isLeapYear(currentYear + YearZero)) 1 else 0 + val leapDay = if (currentMonthInYear == 1 && isLeap(currentYear + YearZero)) 1 else 0 val lastDayOfMonth = monthDays(currentMonthInYear) + leapDay val currentDayInMonth = if (daysToMonthEnd == 0 || dayOfMonth >= lastDayOfMonth) { @@ -938,8 +722,8 @@ object DateTimeUtils { * since 1.1.1970. */ def getLastDayOfMonth(date: SQLDate): SQLDate = { - val (_, _, _, daysToMonthEnd) = splitDate(date) - date + daysToMonthEnd + val localDate = LocalDate.ofEpochDay(date) + (date - localDate.getDayOfMonth) + localDate.lengthOfMonth() } // Visible for testing. @@ -1123,11 +907,4 @@ object DateTimeUtils { def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = { convertTz(time, getTimeZone(timeZone), TimeZoneGMT) } - - /** - * Re-initialize the current thread's thread locals. Exposed for testing. - */ - private[util] def resetThreadLocals(): Unit = { - threadLocalGmtCalendar.remove() - } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 1374a825ec6d..4ec61e1ca4a5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -23,6 +23,8 @@ import java.time.format.DateTimeParseException import java.time.temporal.TemporalQueries import java.util.{Locale, TimeZone} +import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros + sealed trait TimestampFormatter extends Serializable { /** * Parses a timestamp in a string and converts it to microseconds. @@ -56,12 +58,6 @@ class Iso8601TimestampFormatter( } } - private def instantToMicros(instant: Instant): Long = { - val sec = Math.multiplyExact(instant.getEpochSecond, DateTimeUtils.MICROS_PER_SECOND) - val result = Math.addExact(sec, instant.getNano / DateTimeUtils.NANOS_PER_MICROS) - result - } - override def parse(s: String): Long = instantToMicros(toInstant(s)) override def format(us: Long): String = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index c9d733726ff2..c3b7e19455cd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat import java.util.{Calendar, Locale, TimeZone} +import java.util.concurrent.TimeUnit import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection +import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter} import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -40,12 +41,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val pstId = Option(TimeZonePST.getID) val jstId = Option(TimeZoneJST.getID) - val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) - sdf.setTimeZone(TimeZoneGMT) - val sdfDate = new SimpleDateFormat("yyyy-MM-dd", Locale.US) - sdfDate.setTimeZone(TimeZoneGMT) - val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime) - val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime) + def toMillis(timestamp: String): Long = { + val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", TimeZoneGMT) + TimeUnit.MICROSECONDS.toMillis(tf.parse(timestamp)) + } + val date = "2015-04-08 13:10:15" + val d = new Date(toMillis(date)) + val time = "2013-11-08 13:10:15" + val ts = new Timestamp(toMillis(time)) test("datetime function current_date") { val d0 = DateTimeUtils.millisToDays(System.currentTimeMillis(), TimeZoneGMT) @@ -78,15 +81,15 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } checkEvaluation(DayOfYear(Literal.create(null, DateType)), null) - checkEvaluation(DayOfYear(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 288) - checkEvaluation(DayOfYear(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 277) + checkEvaluation(DayOfYear(Cast(Literal("1582-10-15 13:10:15"), DateType)), 288) + checkEvaluation(DayOfYear(Cast(Literal("1582-10-04 13:10:15"), DateType)), 277) checkConsistencyBetweenInterpretedAndCodegen(DayOfYear, DateType) } test("Year") { checkEvaluation(Year(Literal.create(null, DateType)), null) checkEvaluation(Year(Literal(d)), 2015) - checkEvaluation(Year(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 2015) + checkEvaluation(Year(Cast(Literal(date), DateType, gmtId)), 2015) checkEvaluation(Year(Cast(Literal(ts), DateType, gmtId)), 2013) val c = Calendar.getInstance() @@ -100,15 +103,15 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } } - checkEvaluation(Year(Literal(new Date(sdf.parse("1582-01-01 13:10:15").getTime))), 1582) - checkEvaluation(Year(Literal(new Date(sdf.parse("1581-12-31 13:10:15").getTime))), 1581) + checkEvaluation(Year(Cast(Literal("1582-01-01 13:10:15"), DateType)), 1582) + checkEvaluation(Year(Cast(Literal("1581-12-31 13:10:15"), DateType)), 1581) checkConsistencyBetweenInterpretedAndCodegen(Year, DateType) } test("Quarter") { checkEvaluation(Quarter(Literal.create(null, DateType)), null) checkEvaluation(Quarter(Literal(d)), 2) - checkEvaluation(Quarter(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 2) + checkEvaluation(Quarter(Cast(Literal(date), DateType, gmtId)), 2) checkEvaluation(Quarter(Cast(Literal(ts), DateType, gmtId)), 4) val c = Calendar.getInstance() @@ -123,20 +126,20 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } - checkEvaluation(Quarter(Literal(new Date(sdf.parse("1582-10-01 13:10:15").getTime))), 4) - checkEvaluation(Quarter(Literal(new Date(sdf.parse("1582-09-30 13:10:15").getTime))), 3) + checkEvaluation(Quarter(Cast(Literal("1582-10-01 13:10:15"), DateType)), 4) + checkEvaluation(Quarter(Cast(Literal("1582-09-30 13:10:15"), DateType)), 3) checkConsistencyBetweenInterpretedAndCodegen(Quarter, DateType) } test("Month") { checkEvaluation(Month(Literal.create(null, DateType)), null) checkEvaluation(Month(Literal(d)), 4) - checkEvaluation(Month(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 4) + checkEvaluation(Month(Cast(Literal(date), DateType, gmtId)), 4) checkEvaluation(Month(Cast(Literal(ts), DateType, gmtId)), 11) - checkEvaluation(Month(Literal(new Date(sdf.parse("1582-04-28 13:10:15").getTime))), 4) - checkEvaluation(Month(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 10) - checkEvaluation(Month(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 10) + checkEvaluation(Month(Cast(Literal("1582-04-28 13:10:15"), DateType)), 4) + checkEvaluation(Month(Cast(Literal("1582-10-04 13:10:15"), DateType)), 10) + checkEvaluation(Month(Cast(Literal("1582-10-15 13:10:15"), DateType)), 10) val c = Calendar.getInstance() (2003 to 2004).foreach { y => @@ -156,12 +159,12 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(DayOfMonth(Cast(Literal("2000-02-29"), DateType)), 29) checkEvaluation(DayOfMonth(Literal.create(null, DateType)), null) checkEvaluation(DayOfMonth(Literal(d)), 8) - checkEvaluation(DayOfMonth(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 8) + checkEvaluation(DayOfMonth(Cast(Literal(date), DateType, gmtId)), 8) checkEvaluation(DayOfMonth(Cast(Literal(ts), DateType, gmtId)), 8) - checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-04-28 13:10:15").getTime))), 28) - checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 15) - checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 4) + checkEvaluation(DayOfMonth(Cast(Literal("1582-04-28 13:10:15"), DateType)), 28) + checkEvaluation(DayOfMonth(Cast(Literal("1582-10-15 13:10:15"), DateType)), 15) + checkEvaluation(DayOfMonth(Cast(Literal("1582-10-04 13:10:15"), DateType)), 4) val c = Calendar.getInstance() (1999 to 2000).foreach { y => @@ -179,7 +182,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { assert(Second(Literal.create(null, DateType), gmtId).resolved === false) assert(Second(Cast(Literal(d), TimestampType, gmtId), gmtId).resolved === true) checkEvaluation(Second(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) - checkEvaluation(Second(Cast(Literal(sdf.format(d)), TimestampType, gmtId), gmtId), 15) + checkEvaluation(Second(Cast(Literal(date), TimestampType, gmtId), gmtId), 15) checkEvaluation(Second(Literal(ts), gmtId), 15) val c = Calendar.getInstance() @@ -200,13 +203,13 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("DayOfWeek") { checkEvaluation(DayOfWeek(Literal.create(null, DateType)), null) checkEvaluation(DayOfWeek(Literal(d)), Calendar.WEDNESDAY) - checkEvaluation(DayOfWeek(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), + checkEvaluation(DayOfWeek(Cast(Literal(date), DateType, gmtId)), Calendar.WEDNESDAY) checkEvaluation(DayOfWeek(Cast(Literal(ts), DateType, gmtId)), Calendar.FRIDAY) checkEvaluation(DayOfWeek(Cast(Literal("2011-05-06"), DateType, gmtId)), Calendar.FRIDAY) - checkEvaluation(DayOfWeek(Literal(new Date(sdf.parse("2017-05-27 13:10:15").getTime))), + checkEvaluation(DayOfWeek(Literal(new Date(toMillis("2017-05-27 13:10:15")))), Calendar.SATURDAY) - checkEvaluation(DayOfWeek(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), + checkEvaluation(DayOfWeek(Literal(new Date(toMillis("1582-10-15 13:10:15")))), Calendar.FRIDAY) checkConsistencyBetweenInterpretedAndCodegen(DayOfWeek, DateType) } @@ -214,22 +217,22 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("WeekDay") { checkEvaluation(WeekDay(Literal.create(null, DateType)), null) checkEvaluation(WeekDay(Literal(d)), 2) - checkEvaluation(WeekDay(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 2) + checkEvaluation(WeekDay(Cast(Literal(date), DateType, gmtId)), 2) checkEvaluation(WeekDay(Cast(Literal(ts), DateType, gmtId)), 4) checkEvaluation(WeekDay(Cast(Literal("2011-05-06"), DateType, gmtId)), 4) - checkEvaluation(WeekDay(Literal(new Date(sdf.parse("2017-05-27 13:10:15").getTime))), 5) - checkEvaluation(WeekDay(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 4) + checkEvaluation(WeekDay(Literal(new Date(toMillis("2017-05-27 13:10:15")))), 5) + checkEvaluation(WeekDay(Literal(new Date(toMillis("1582-10-15 13:10:15")))), 4) checkConsistencyBetweenInterpretedAndCodegen(WeekDay, DateType) } test("WeekOfYear") { checkEvaluation(WeekOfYear(Literal.create(null, DateType)), null) checkEvaluation(WeekOfYear(Literal(d)), 15) - checkEvaluation(WeekOfYear(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 15) + checkEvaluation(WeekOfYear(Cast(Literal(date), DateType, gmtId)), 15) checkEvaluation(WeekOfYear(Cast(Literal(ts), DateType, gmtId)), 45) checkEvaluation(WeekOfYear(Cast(Literal("2011-05-06"), DateType, gmtId)), 18) - checkEvaluation(WeekOfYear(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 40) - checkEvaluation(WeekOfYear(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 40) + checkEvaluation(WeekOfYear(Literal(new Date(toMillis("1582-10-15 13:10:15")))), 40) + checkEvaluation(WeekOfYear(Literal(new Date(toMillis("1582-10-04 13:10:15")))), 39) checkConsistencyBetweenInterpretedAndCodegen(WeekOfYear, DateType) } @@ -266,7 +269,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { assert(Hour(Literal.create(null, DateType), gmtId).resolved === false) assert(Hour(Literal(ts), gmtId).resolved === true) checkEvaluation(Hour(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) - checkEvaluation(Hour(Cast(Literal(sdf.format(d)), TimestampType, gmtId), gmtId), 13) + checkEvaluation(Hour(Cast(Literal(date), TimestampType, gmtId), gmtId), 13) checkEvaluation(Hour(Literal(ts), gmtId), 13) val c = Calendar.getInstance() @@ -293,7 +296,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { assert(Minute(Literal(ts), gmtId).resolved === true) checkEvaluation(Minute(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) checkEvaluation( - Minute(Cast(Literal(sdf.format(d)), TimestampType, gmtId), gmtId), 10) + Minute(Cast(Literal(date), TimestampType, gmtId), gmtId), 10) checkEvaluation(Minute(Literal(ts), gmtId), 10) val c = Calendar.getInstance() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala index 4281c89ac475..555ccb892497 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala @@ -183,7 +183,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkHiveHashForDateType("2017-01-01", 17167) // boundary cases - checkHiveHashForDateType("0000-01-01", -719530) + checkHiveHashForDateType("0000-01-01", -719528) checkHiveHashForDateType("9999-12-31", 2932896) // epoch @@ -226,7 +226,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { TimeZone.getTimeZone("US/Pacific")) // boundary cases - checkHiveHashForTimestampType("0001-01-01 00:00:00", 1645926784) + checkHiveHashForTimestampType("0001-01-01 00:00:00", 1645969984) checkHiveHashForTimestampType("9999-01-01 00:00:00", -1081818240) // epoch diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala index 66d8d28988f8..442d78598a38 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala @@ -40,12 +40,10 @@ object DateTimeTestUtils { def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = { val originalDefaultTimeZone = TimeZone.getDefault try { - DateTimeUtils.resetThreadLocals() TimeZone.setDefault(newDefaultTimeZone) block } finally { TimeZone.setDefault(originalDefaultTimeZone) - DateTimeUtils.resetThreadLocals() } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index e732eb0ef981..ef34150fa623 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -19,7 +19,9 @@ package org.apache.spark.sql.catalyst.util import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat +import java.time.LocalDate import java.util.{Calendar, Locale, TimeZone} +import java.util.concurrent.TimeUnit import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util.DateTimeUtils._ @@ -29,9 +31,9 @@ class DateTimeUtilsSuite extends SparkFunSuite { val TimeZonePST = TimeZone.getTimeZone("PST") - private[this] def getInUTCDays(timestamp: Long): Int = { - val tz = TimeZone.getDefault - ((timestamp + tz.getOffset(timestamp)) / MILLIS_PER_DAY).toInt + private[this] def getInUTCDays(localDate: LocalDate): Int = { + val epochSeconds = localDate.atStartOfDay(TimeZoneUTC.toZoneId).toEpochSecond + TimeUnit.SECONDS.toDays(epochSeconds).toInt } test("nanoseconds truncation") { @@ -131,8 +133,10 @@ class DateTimeUtilsSuite extends SparkFunSuite { millisToDays(c.getTimeInMillis)) c.set(1, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) + val localDate = LocalDate.of(1, 1, 1) + .atStartOfDay(TimeZoneUTC.toZoneId) assert(stringToDate(UTF8String.fromString("0001")).get === - millisToDays(c.getTimeInMillis)) + TimeUnit.SECONDS.toDays(localDate.toEpochSecond)) c = Calendar.getInstance() c.set(2015, 2, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) @@ -157,38 +161,6 @@ class DateTimeUtilsSuite extends SparkFunSuite { assert(stringToDate(UTF8String.fromString("02015")).isEmpty) } - test("string to time") { - // Tests with UTC. - val c = Calendar.getInstance(TimeZone.getTimeZone("UTC")) - c.set(Calendar.MILLISECOND, 0) - - c.set(1900, 0, 1, 0, 0, 0) - assert(stringToTime("1900-01-01T00:00:00GMT-00:00") === c.getTime()) - - c.set(2000, 11, 30, 10, 0, 0) - assert(stringToTime("2000-12-30T10:00:00Z") === c.getTime()) - - // Tests with set time zone. - c.setTimeZone(TimeZone.getTimeZone("GMT-04:00")) - c.set(Calendar.MILLISECOND, 0) - - c.set(1900, 0, 1, 0, 0, 0) - assert(stringToTime("1900-01-01T00:00:00-04:00") === c.getTime()) - - c.set(1900, 0, 1, 0, 0, 0) - assert(stringToTime("1900-01-01T00:00:00GMT-04:00") === c.getTime()) - - // Tests with local time zone. - c.setTimeZone(TimeZone.getDefault()) - c.set(Calendar.MILLISECOND, 0) - - c.set(2000, 11, 30, 0, 0, 0) - assert(stringToTime("2000-12-30") === new Date(c.getTimeInMillis())) - - c.set(2000, 11, 30, 10, 0, 0) - assert(stringToTime("2000-12-30 10:00:00") === new Timestamp(c.getTimeInMillis())) - } - test("string to timestamp") { for (tz <- DateTimeTestUtils.ALL_TIMEZONES) { def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = { @@ -201,7 +173,9 @@ class DateTimeUtilsSuite extends SparkFunSuite { checkStringToTimestamp("1969-12-31 16:00:00", Option(c.getTimeInMillis * 1000)) c.set(1, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) - checkStringToTimestamp("0001", Option(c.getTimeInMillis * 1000)) + val date = LocalDate.of(1, 1, 1) + .atStartOfDay(tz.toZoneId) + checkStringToTimestamp("0001", Option(TimeUnit.SECONDS.toMicros(date.toEpochSecond))) c = Calendar.getInstance(tz) c.set(2015, 2, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) @@ -404,73 +378,47 @@ class DateTimeUtilsSuite extends SparkFunSuite { } test("get day in year") { - val c = Calendar.getInstance() - c.set(2015, 2, 18, 0, 0, 0) - assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 77) - c.set(2012, 2, 18, 0, 0, 0) - assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 78) + assert(getDayInYear(getInUTCDays(LocalDate.of(2015, 3, 18))) === 77) + assert(getDayInYear(getInUTCDays(LocalDate.of(2012, 3, 18))) === 78) } - test("SPARK-26002: correct day of year calculations for Julian calendar years") { - val c = Calendar.getInstance() - c.set(Calendar.MILLISECOND, 0) - (1000 to 1600 by 100).foreach { year => + test("day of year calculations for old years") { + var date = LocalDate.of(1582, 3, 1) + assert(getDayInYear(getInUTCDays(date)) === 60) + + (1000 to 1600 by 10).foreach { year => // January 1 is the 1st day of year. - c.set(year, 0, 1, 0, 0, 0) - assert(getYear(getInUTCDays(c.getTimeInMillis)) === year) - assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 1) - assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 1) - - // March 1 is the 61st day of the year as they are leap years. It is true for - // even the multiples of 100 as before 1582-10-4 the Julian calendar leap year calculation - // is used in which every multiples of 4 are leap years - c.set(year, 2, 1, 0, 0, 0) - assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 61) - assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 3) - - // testing leap day (February 29) in leap years - c.set(year, 1, 29, 0, 0, 0) - assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60) - - // For non-leap years: - c.set(year + 1, 2, 1, 0, 0, 0) - assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60) + date = LocalDate.of(year, 1, 1) + assert(getYear(getInUTCDays(date)) === year) + assert(getMonth(getInUTCDays(date)) === 1) + assert(getDayInYear(getInUTCDays(date)) === 1) + + // December 31 is the 1st day of year. + date = LocalDate.of(year, 12, 31) + assert(getYear(getInUTCDays(date)) === year) + assert(getMonth(getInUTCDays(date)) === 12) + assert(getDayOfMonth(getInUTCDays(date)) === 31) } - - c.set(1582, 2, 1, 0, 0, 0) - assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60) } test("get year") { - val c = Calendar.getInstance() - c.set(2015, 2, 18, 0, 0, 0) - assert(getYear(getInUTCDays(c.getTimeInMillis)) === 2015) - c.set(2012, 2, 18, 0, 0, 0) - assert(getYear(getInUTCDays(c.getTimeInMillis)) === 2012) + assert(getYear(getInUTCDays(LocalDate.of(2015, 2, 18))) === 2015) + assert(getYear(getInUTCDays(LocalDate.of(2012, 2, 18))) === 2012) } test("get quarter") { - val c = Calendar.getInstance() - c.set(2015, 2, 18, 0, 0, 0) - assert(getQuarter(getInUTCDays(c.getTimeInMillis)) === 1) - c.set(2012, 11, 18, 0, 0, 0) - assert(getQuarter(getInUTCDays(c.getTimeInMillis)) === 4) + assert(getQuarter(getInUTCDays(LocalDate.of(2015, 2, 18))) === 1) + assert(getQuarter(getInUTCDays(LocalDate.of(2012, 11, 18))) === 4) } test("get month") { - val c = Calendar.getInstance() - c.set(2015, 2, 18, 0, 0, 0) - assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 3) - c.set(2012, 11, 18, 0, 0, 0) - assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 12) + assert(getMonth(getInUTCDays(LocalDate.of(2015, 3, 18))) === 3) + assert(getMonth(getInUTCDays(LocalDate.of(2012, 12, 18))) === 12) } test("get day of month") { - val c = Calendar.getInstance() - c.set(2015, 2, 18, 0, 0, 0) - assert(getDayOfMonth(getInUTCDays(c.getTimeInMillis)) === 18) - c.set(2012, 11, 24, 0, 0, 0) - assert(getDayOfMonth(getInUTCDays(c.getTimeInMillis)) === 24) + assert(getDayOfMonth(getInUTCDays(LocalDate.of(2015, 3, 18))) === 18) + assert(getDayOfMonth(getInUTCDays(LocalDate.of(2012, 12, 24))) === 24) } test("date add months") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 54c77dddc352..ce1dc6e159c6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.execution.datasources.parquet import java.io.File -import java.sql.Timestamp import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.parquet.hadoop.ParquetOutputFormat @@ -187,12 +186,12 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext sql("insert into ts values (1, '2016-01-01 10:11:12.123456')") sql("insert into ts values (2, null)") sql("insert into ts values (3, '1965-01-01 10:11:12.123456')") - checkAnswer( - sql("select * from ts"), - Seq( - Row(1, Timestamp.valueOf("2016-01-01 10:11:12.123456")), - Row(2, null), - Row(3, Timestamp.valueOf("1965-01-01 10:11:12.123456")))) + val expected = Seq( + (1, "2016-01-01 10:11:12.123456"), + (2, null), + (3, "1965-01-01 10:11:12.123456")) + .toDS().select('_1, $"_2".cast("timestamp")) + checkAnswer(sql("select * from ts"), expected) } // The microsecond portion is truncated when written as TIMESTAMP_MILLIS. @@ -206,30 +205,30 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext sql("insert into ts values (5, '1965-01-01 10:11:12.1')") sql("insert into ts values (6, '1965-01-01 10:11:12.123456789')") sql("insert into ts values (7, '0001-01-01 00:00:00.000000')") - checkAnswer( - sql("select * from ts"), - Seq( - Row(1, Timestamp.valueOf("2016-01-01 10:11:12.123")), - Row(2, null), - Row(3, Timestamp.valueOf("1965-01-01 10:11:12.125")), - Row(4, Timestamp.valueOf("1965-01-01 10:11:12.125")), - Row(5, Timestamp.valueOf("1965-01-01 10:11:12.1")), - Row(6, Timestamp.valueOf("1965-01-01 10:11:12.123")), - Row(7, Timestamp.valueOf("0001-01-01 00:00:00.000")))) + val expected = Seq( + (1, "2016-01-01 10:11:12.123"), + (2, null), + (3, "1965-01-01 10:11:12.125"), + (4, "1965-01-01 10:11:12.125"), + (5, "1965-01-01 10:11:12.1"), + (6, "1965-01-01 10:11:12.123"), + (7, "0001-01-01 00:00:00.000")) + .toDS().select('_1, $"_2".cast("timestamp")) + checkAnswer(sql("select * from ts"), expected) // Read timestamps that were encoded as TIMESTAMP_MILLIS annotated as INT64 // with PARQUET_INT64_AS_TIMESTAMP_MILLIS set to false. withSQLConf(SQLConf.PARQUET_INT64_AS_TIMESTAMP_MILLIS.key -> "false") { - checkAnswer( - sql("select * from ts"), - Seq( - Row(1, Timestamp.valueOf("2016-01-01 10:11:12.123")), - Row(2, null), - Row(3, Timestamp.valueOf("1965-01-01 10:11:12.125")), - Row(4, Timestamp.valueOf("1965-01-01 10:11:12.125")), - Row(5, Timestamp.valueOf("1965-01-01 10:11:12.1")), - Row(6, Timestamp.valueOf("1965-01-01 10:11:12.123")), - Row(7, Timestamp.valueOf("0001-01-01 00:00:00.000")))) + val expected = Seq( + (1, "2016-01-01 10:11:12.123"), + (2, null), + (3, "1965-01-01 10:11:12.125"), + (4, "1965-01-01 10:11:12.125"), + (5, "1965-01-01 10:11:12.1"), + (6, "1965-01-01 10:11:12.123"), + (7, "0001-01-01 00:00:00.000")) + .toDS().select('_1, $"_2".cast("timestamp")) + checkAnswer(sql("select * from ts"), expected) } } }