diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 5ae75dc93930..f590c63f80b2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -53,14 +53,30 @@ object DateTimeUtils { final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L - // number of days in 400 years + // number of days in 400 years by Gregorian calendar final val daysIn400Years: Int = 146097 + + // In the Julian calendar every year that is exactly divisible by 4 is a leap year without any + // exception. But in the Gregorian calendar every year that is exactly divisible by four + // is a leap year, except for years that are exactly divisible by 100, but these centurial years + // are leap years if they are exactly divisible by 400. + // So there are 3 extra days in the Julian calendar within a 400 years cycle compared to the + // Gregorian calendar. + final val extraLeapDaysIn400YearsJulian = 3 + + // number of days in 400 years by Julian calendar + final val daysIn400YearsInJulian: Int = daysIn400Years + extraLeapDaysIn400YearsJulian + // number of days between 1.1.1970 and 1.1.2001 final val to2001 = -11323 // this is year -17999, calculation: 50 * daysIn400Year final val YearZero = -17999 final val toYearZero = to2001 + 7304850 + + // days to year -17999 in Julian calendar + final val toYearZeroInJulian = toYearZero + 49 * extraLeapDaysIn400YearsJulian + final val TimeZoneGMT = TimeZone.getTimeZone("GMT") final val TimeZoneUTC = TimeZone.getTimeZone("UTC") final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12) @@ -585,20 +601,30 @@ object DateTimeUtils { * Return the number of days since the start of 400 year period. * The second year of a 400 year period (year 1) starts on day 365. */ - private[this] def yearBoundary(year: Int): Int = { - year * 365 + ((year / 4 ) - (year / 100) + (year / 400)) + private[this] def yearBoundary(year: Int, isGregorian: Boolean): Int = { + if (isGregorian) { + year * 365 + ((year / 4) - (year / 100) + (year / 400)) + } else { + year * 365 + (year / 4) + } } /** * Calculates the number of years for the given number of days. This depends * on a 400 year period. * @param days days since the beginning of the 400 year period + * @param isGregorian indicates whether leap years should be calculated according to Gregorian + * (or Julian) calendar * @return (number of year, days in year) */ - private[this] def numYears(days: Int): (Int, Int) = { + private[this] def numYears(days: Int, isGregorian: Boolean): (Int, Int) = { val year = days / 365 - val boundary = yearBoundary(year) - if (days > boundary) (year, days - boundary) else (year - 1, days - yearBoundary(year - 1)) + val boundary = yearBoundary(year, isGregorian) + if (days > boundary) { + (year, days - boundary) + } else { + (year - 1, days - yearBoundary(year - 1, isGregorian)) + } } /** @@ -609,18 +635,26 @@ object DateTimeUtils { * equals to the period 1.1.1601 until 31.12.2000. */ private[this] def getYearAndDayInYear(daysSince1970: SQLDate): (Int, Int) = { - // add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999) - var daysSince1970Tmp = daysSince1970 // Since Julian calendar was replaced with the Gregorian calendar, // the 10 days after Oct. 4 were skipped. // (1582-10-04) -141428 days since 1970-01-01 if (daysSince1970 <= -141428) { - daysSince1970Tmp -= 10 + getYearAndDayInYear(daysSince1970 - 10, toYearZeroInJulian, daysIn400YearsInJulian, false) + } else { + getYearAndDayInYear(daysSince1970, toYearZero, daysIn400Years, true) } - val daysNormalized = daysSince1970Tmp + toYearZero + } + + private def getYearAndDayInYear( + daysSince1970: SQLDate, + toYearZero: SQLDate, + daysIn400Years: SQLDate, + isGregorian: Boolean): (Int, Int) = { + // add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999) + val daysNormalized = daysSince1970 + toYearZero val numOfQuarterCenturies = daysNormalized / daysIn400Years val daysInThis400 = daysNormalized % daysIn400Years + 1 - val (years, dayInYear) = numYears(daysInThis400) + val (years, dayInYear) = numYears(daysInThis400, isGregorian) val year: Int = (2001 - 20000) + 400 * numOfQuarterCenturies + years (year, dayInYear) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 0182eeb17121..2cb6110e2c09 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -410,6 +410,36 @@ class DateTimeUtilsSuite extends SparkFunSuite { assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 78) } + test("SPARK-26002: correct day of year calculations for Julian calendar years") { + val c = Calendar.getInstance() + c.set(Calendar.MILLISECOND, 0) + (1000 to 1600 by 100).foreach { year => + // January 1 is the 1st day of year. + c.set(year, 0, 1, 0, 0, 0) + assert(getYear(getInUTCDays(c.getTimeInMillis)) === year) + assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 1) + assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 1) + + // March 1 is the 61st day of the year as they are leap years. It is true for + // even the multiples of 100 as before 1582-10-4 the Julian calendar leap year calculation + // is used in which every multiples of 4 are leap years + c.set(year, 2, 1, 0, 0, 0) + assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 61) + assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 3) + + // testing leap day (February 29) in leap years + c.set(year, 1, 29, 0, 0, 0) + assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60) + + // For non-leap years: + c.set(year + 1, 2, 1, 0, 0, 0) + assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60) + } + + c.set(1582, 2, 1, 0, 0, 0) + assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60) + } + test("get year") { val c = Calendar.getInstance() c.set(2015, 2, 18, 0, 0, 0) diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 547c2bef02b2..8bd8bc2b94b8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -27,3 +27,5 @@ select current_date = current_date(), current_timestamp = current_timestamp(), a select a, b from ttf2 order by a, current_date; select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null), weekday('1582-10-15 13:10:15'); + +select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01'); \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 63aa00426ea3..2090633802e2 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 10 +-- Number of queries: 11 -- !query 0 @@ -89,3 +89,11 @@ select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), week struct -- !query 9 output 5 3 5 NULL 4 + + +-- !query 10 +select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01') +-- !query 10 schema +struct +-- !query 10 output +1500 1 1