Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -335,48 +335,6 @@ case class SecondWithFraction(child: Expression, timeZoneId: Option[String] = No
}
}

case class Milliseconds(child: Expression, timeZoneId: Option[String] = None)
extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression {

override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
// DecimalType is used here to not lose precision while converting microseconds to
// the fractional part of milliseconds. Scale 3 is taken to have all microseconds as
// the fraction. The precision 8 should cover 2 digits for seconds, 3 digits for
// milliseconds and 3 digits for microseconds.
override def dataType: DataType = DecimalType(8, 3)
override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
copy(timeZoneId = Option(timeZoneId))

override protected def nullSafeEval(timestamp: Any): Any = {
DateTimeUtils.getMilliseconds(timestamp.asInstanceOf[Long], zoneId)
}

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, c => s"$dtu.getMilliseconds($c, $zid)")
}
}

case class Microseconds(child: Expression, timeZoneId: Option[String] = None)
extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression {

override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
override def dataType: DataType = IntegerType
override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
copy(timeZoneId = Option(timeZoneId))

override protected def nullSafeEval(timestamp: Any): Any = {
DateTimeUtils.getMicroseconds(timestamp.asInstanceOf[Long], zoneId)
}

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, c => s"$dtu.getMicroseconds($c, $zid)")
}
}

@ExpressionDescription(
usage = "_FUNC_(date) - Returns the day of year of the date/timestamp.",
examples = """
Expand Down Expand Up @@ -427,19 +385,19 @@ case class Year(child: Expression) extends UnaryExpression with ImplicitCastInpu
}
}

case class IsoYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
case class YearOfWeek(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(DateType)

override def dataType: DataType = IntegerType

override protected def nullSafeEval(date: Any): Any = {
DateTimeUtils.getIsoYear(date.asInstanceOf[Int])
DateTimeUtils.getWeekBasedYear(date.asInstanceOf[Int])
}

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, c => s"$dtu.getIsoYear($c)")
defineCodeGen(ctx, ev, c => s"$dtu.getWeekBasedYear($c)")
}
}

Expand Down Expand Up @@ -2033,108 +1991,26 @@ case class MakeTimestamp(
override def prettyName: String = "make_timestamp"
}

case class Millennium(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(DateType)

override def dataType: DataType = IntegerType

override protected def nullSafeEval(date: Any): Any = {
DateTimeUtils.getMillennium(date.asInstanceOf[Int])
}

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, c => s"$dtu.getMillennium($c)")
}
}

case class Century(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(DateType)

override def dataType: DataType = IntegerType

override protected def nullSafeEval(date: Any): Any = {
DateTimeUtils.getCentury(date.asInstanceOf[Int])
}

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, c => s"$dtu.getCentury($c)")
}
}

case class Decade(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(DateType)

override def dataType: DataType = IntegerType

override protected def nullSafeEval(date: Any): Any = {
DateTimeUtils.getDecade(date.asInstanceOf[Int])
}

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, c => s"$dtu.getDecade($c)")
}
}

case class Epoch(child: Expression, timeZoneId: Option[String] = None)
extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression {

override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
// DecimalType is used to not lose precision while converting microseconds to
// the fractional part of seconds. Scale 6 is taken to have all microseconds as
// the fraction. The precision 20 should cover whole valid range of years [1, 9999]
// plus negative years that can be used in some cases though are not officially supported.
override def dataType: DataType = DecimalType(20, 6)
override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
copy(timeZoneId = Option(timeZoneId))

override protected def nullSafeEval(timestamp: Any): Any = {
DateTimeUtils.getEpoch(timestamp.asInstanceOf[Long], zoneId)
}

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
defineCodeGen(ctx, ev, c => s"$dtu.getEpoch($c, $zid)")
}
}

object DatePart {

def parseExtractField(
extractField: String,
source: Expression,
errorHandleFunc: => Nothing): Expression = extractField.toUpperCase(Locale.ROOT) match {
case "MILLENNIUM" | "MILLENNIA" | "MIL" | "MILS" => Millennium(source)
case "CENTURY" | "CENTURIES" | "C" | "CENT" => Century(source)
case "DECADE" | "DECADES" | "DEC" | "DECS" => Decade(source)
case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => Year(source)
case "ISOYEAR" => IsoYear(source)
case "YEAROFWEEK" => YearOfWeek(source)
case "QUARTER" | "QTR" => Quarter(source)
case "MONTH" | "MON" | "MONS" | "MONTHS" => Month(source)
case "WEEK" | "W" | "WEEKS" => WeekOfYear(source)
case "DAY" | "D" | "DAYS" => DayOfMonth(source)
case "DAYOFWEEK" | "DOW" => DayOfWeek(source)
case "ISODOW" => Add(WeekDay(source), Literal(1))
case "DAYOFWEEK_ISO" | "DOW_ISO" => Add(WeekDay(source), Literal(1))
Copy link
Member

@dongjoon-hyun dongjoon-hyun Apr 21, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unlike the PR title, this PR is adding DAYOFWEEK_ISO newly. Which platform are you referring for DAYOFWEEK_ISO? IIRC, @gatorsmile commented that we should not consider IBM DB i.

isodow is PostgreSQL specific but iso as a suffix is more commonly used across platforms

Copy link
Member Author

@yaooqinn yaooqinn Apr 22, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @dongjoon-hyun, for historical reasons, we have [dayofweek, dow] implemented for representing a non-ISO day-of-week and a newly added isodow from PostgreSQL for ISO day-of-week. Many other systems only have one week-numbering system support and use either full names or abbreviations. Things in spark become a little bit complicated.

  1. because of the existence of isodow, so we need to add iso-prefix to dayofweek to make a pair for it too. [dayofweek, isodayofweek, dow and isodow]
  2. because there are rare iso-prefixed systems and more systems choose iso-suffixed way, so we may result in [dayofweek, dayofweekiso, dow, dowiso]
  3. dayofweekiso looks nice and has use cases in the platforms listed above, e.g. snowflake, but dowiso looks weird and no use cases found.
  4. after a discussion with @cloud-fan, we have both agreed with an underscore before iso may look much better because isodow is new and there is no standard for iso kind of things, so this may be good for us to make it simple and clear for end-users if they are well documented too.

Thus, we finally result in [dayofweek, dow] for Non-ISO week-numbering system and [dayofweek_iso, dow_iso] for ISO system

Copy link
Member

@dongjoon-hyun dongjoon-hyun Apr 22, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yaooqinn . Ya. It's clear for that part. Could you enumerate the name of systems which supports DAYOFWEEK_ISO? (except IBM DB i). I'm wondering that specifically.

It's because you wrote like Many other systems in the PR description.

Copy link
Member Author

@yaooqinn yaooqinn Apr 22, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifically, snowflake use dayofweek_iso except db2. I couldn't find more because most platforms do not have two day-of-week system implemented.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. Thanks. I understand that Snowflake is considered important. Could you add some of the above comment(#28284 (comment)) to the PR description then?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks @dongjoon-hyun. PR description updated

case "DOY" => DayOfYear(source)
case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => Hour(source)
case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" => Minute(source)
case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => SecondWithFraction(source)
case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" =>
Milliseconds(source)
case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" =>
Microseconds(source)
case "EPOCH" => Epoch(source)
case _ => errorHandleFunc
}
}

object DatePartLike {

def toEquivalentExpr(field: Expression, source: Expression): Expression = {
if (!field.foldable) {
Expand Down Expand Up @@ -2192,7 +2068,7 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
extends RuntimeReplaceable {

def this(field: Expression, source: Expression) = {
this(field, source, DatePartLike.toEquivalentExpr(field, source))
this(field, source, DatePart.toEquivalentExpr(field, source))
}

override def flatArguments: Iterator[Any] = Iterator(field, source)
Expand All @@ -2206,26 +2082,20 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
arguments = """
Arguments:
* field - selects which part of the source should be extracted
- Supported string values of `field` for dates and timestamps are:
- "MILLENNIUM", ("MILLENNIA", "MIL", "MILS") - the conventional numbering of millennia
- "CENTURY", ("CENTURIES", "C", "CENT") - the conventional numbering of centuries
- "DECADE", ("DECADES", "DEC", "DECS") - the year field divided by 10
- Supported string values of `field` for dates and timestamps are(case insensitive):
- "YEAR", ("Y", "YEARS", "YR", "YRS") - the year field
- "ISOYEAR" - the ISO 8601 week-numbering year that the datetime falls in
- "YEAROFWEEK" - the ISO 8601 week-numbering year that the datetime falls in. For example, 2005-01-02 is part of the 53rd week of year 2004, so the result is 2004
- "QUARTER", ("QTR") - the quarter (1 - 4) of the year that the datetime falls in
- "MONTH", ("MON", "MONS", "MONTHS") - the month field (1 - 12)
- "WEEK", ("W", "WEEKS") - the number of the ISO 8601 week-of-week-based-year. A week is considered to start on a Monday and week 1 is the first week with >3 days. In the ISO week-numbering system, it is possible for early-January dates to be part of the 52nd or 53rd week of the previous year, and for late-December dates to be part of the first week of the next year. For example, 2005-01-02 is part of the 53rd week of year 2004, while 2012-12-31 is part of the first week of 2013
- "DAY", ("D", "DAYS") - the day of the month field (1 - 31)
- "DAYOFWEEK",("DOW") - the day of the week for datetime as Sunday(1) to Saturday(7)
- "ISODOW" - ISO 8601 based day of the week for datetime as Monday(1) to Sunday(7)
- "DAYOFWEEK_ISO",("DOW_ISO") - ISO 8601 based day of the week for datetime as Monday(1) to Sunday(7)
- "DOY" - the day of the year (1 - 365/366)
- "HOUR", ("H", "HOURS", "HR", "HRS") - The hour field (0 - 23)
- "MINUTE", ("M", "MIN", "MINS", "MINUTES") - the minutes field (0 - 59)
- "SECOND", ("S", "SEC", "SECONDS", "SECS") - the seconds field, including fractional parts
- "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS") - the seconds field, including fractional parts, multiplied by 1000. Note that this includes full seconds
- "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US") - The seconds field, including fractional parts, multiplied by 1000000. Note that this includes full seconds
- "EPOCH" - the number of seconds with fractional part in microsecond precision since 1970-01-01 00:00:00 local time (can be negative)
- Supported string values of `field` for interval(which consists of `months`, `days`, `microseconds`) are:
- Supported string values of `field` for interval(which consists of `months`, `days`, `microseconds`) are(case insensitive):
- "YEAR", ("Y", "YEARS", "YR", "YRS") - the total `months` / 12
- "MONTH", ("MON", "MONS", "MONTHS") - the total `months` % 12
- "DAY", ("D", "DAYS") - the `days` part of interval
Expand Down Expand Up @@ -2258,7 +2128,7 @@ case class Extract(field: Expression, source: Expression, child: Expression)
extends RuntimeReplaceable {

def this(field: Expression, source: Expression) = {
this(field, source, DatePartLike.toEquivalentExpr(field, source))
this(field, source, DatePart.toEquivalentExpr(field, source))
}

override def flatArguments: Iterator[Any] = Iterator(field, source)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,14 +526,6 @@ object DateTimeUtils {
Decimal(getMicroseconds(microsec, zoneId), 8, 6)
}

/**
* Returns seconds, including fractional parts, multiplied by 1000. The timestamp
* is expressed in microseconds since the epoch.
*/
def getMilliseconds(timestamp: SQLTimestamp, zoneId: ZoneId): Decimal = {
Decimal(getMicroseconds(timestamp, zoneId), 8, 3)
}

/**
* Returns seconds, including fractional parts, multiplied by 1000000. The timestamp
* is expressed in microseconds since the epoch.
Expand All @@ -551,24 +543,6 @@ object DateTimeUtils {
LocalDate.ofEpochDay(date).getDayOfYear
}

private def extractFromYear(date: SQLDate, divider: Int): Int = {
val localDate = daysToLocalDate(date)
val yearOfEra = localDate.get(ChronoField.YEAR_OF_ERA)
var result = yearOfEra / divider
if ((yearOfEra % divider) != 0 || yearOfEra <= 1) result += 1
if (localDate.get(ChronoField.ERA) == 0) result = -result
result
}

/** Returns the millennium for the given date. The date is expressed in days since 1.1.1970. */
def getMillennium(date: SQLDate): Int = extractFromYear(date, 1000)

/** Returns the century for the given date. The date is expressed in days since 1.1.1970. */
def getCentury(date: SQLDate): Int = extractFromYear(date, 100)

/** Returns the decade for the given date. The date is expressed in days since 1.1.1970. */
def getDecade(date: SQLDate): Int = Math.floorDiv(getYear(date), 10)

/**
* Returns the year value for the given date. The date is expressed in days
* since 1.1.1970.
Expand All @@ -581,7 +555,7 @@ object DateTimeUtils {
* Returns the year which conforms to ISO 8601. Each ISO 8601 week-numbering
* year begins with the Monday of the week containing the 4th of January.
*/
def getIsoYear(date: SQLDate): Int = {
def getWeekBasedYear(date: SQLDate): Int = {
daysToLocalDate(date).get(IsoFields.WEEK_BASED_YEAR)
}

Expand Down Expand Up @@ -863,17 +837,6 @@ object DateTimeUtils {
convertTz(time, getZoneId(timeZone), ZoneOffset.UTC)
}

/**
* Returns the number of seconds with fractional part in microsecond precision
* since 1970-01-01 00:00:00 local time.
*/
def getEpoch(timestamp: SQLTimestamp, zoneId: ZoneId): Decimal = {
val offset = SECONDS.toMicros(
zoneId.getRules.getOffset(microsToInstant(timestamp)).getTotalSeconds)
val sinceEpoch = timestamp + offset
Decimal(sinceEpoch, 20, 6)
}

def currentTimestamp(): SQLTimestamp = instantToMicros(Instant.now())

def currentDate(zoneId: ZoneId): SQLDate = localDateToDays(LocalDate.now(zoneId))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1032,94 +1032,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-08-12 00:00:58.000001"))
}

test("millennium") {
val date = MakeDate(Literal(2019), Literal(1), Literal(1))
checkEvaluation(Millennium(date), 3)
checkEvaluation(Millennium(date.copy(year = Literal(2001))), 3)
checkEvaluation(Millennium(date.copy(year = Literal(2000))), 2)
checkEvaluation(Millennium(date.copy(year = Literal(1001), day = Literal(28))), 2)
checkEvaluation(Millennium(date.copy(year = Literal(1))), 1)
checkEvaluation(Millennium(date.copy(year = Literal(-1))), -1)
checkEvaluation(Millennium(date.copy(year = Literal(-100), month = Literal(12))), -1)
checkEvaluation(Millennium(date.copy(year = Literal(-2019))), -3)
}

test("century") {
val date = MakeDate(Literal(2019), Literal(1), Literal(1))
checkEvaluation(Century(date), 21)
checkEvaluation(Century(date.copy(year = Literal(2001))), 21)
checkEvaluation(Century(date.copy(year = Literal(2000))), 20)
checkEvaluation(Century(date.copy(year = Literal(1001), day = Literal(28))), 11)
checkEvaluation(Century(date.copy(year = Literal(1))), 1)
checkEvaluation(Century(date.copy(year = Literal(-1))), -1)
checkEvaluation(Century(date.copy(year = Literal(-100), month = Literal(12))), -2)
checkEvaluation(Century(date.copy(year = Literal(-2019))), -21)
}

test("decade") {
val date = MakeDate(Literal(2019), Literal(8), Literal(8))
checkEvaluation(Decade(date), 201)
checkEvaluation(Decade(date.copy(year = Literal(2011))), 201)
checkEvaluation(Decade(date.copy(year = Literal(2010))), 201)
checkEvaluation(Decade(date.copy(year = Literal(2009))), 200)
checkEvaluation(Decade(date.copy(year = Literal(10))), 1)
checkEvaluation(Decade(date.copy(year = Literal(1))), 0)
checkEvaluation(Decade(date.copy(year = Literal(-1))), -1)
checkEvaluation(Decade(date.copy(year = Literal(-10))), -1)
checkEvaluation(Decade(date.copy(year = Literal(-11))), -2)
checkEvaluation(Decade(date.copy(year = Literal(-2019))), -202)
}

test("milliseconds and microseconds") {
outstandingTimezonesIds.foreach { timezone =>
var timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10),
Literal(0), Literal(0), Literal(Decimal(BigDecimal(10.123456789), 8, 6)),
Some(Literal(timezone)), Some(timezone))
def millis(ts: MakeTimestamp): Milliseconds = Milliseconds(timestamp, Some(timezone))
def micros(ts: MakeTimestamp): Microseconds = Microseconds(timestamp, Some(timezone))

checkEvaluation(millis(timestamp), Decimal(BigDecimal(10123.457), 8, 3))
checkEvaluation(
millis(timestamp.copy(year = Literal(10))),
Decimal(BigDecimal(10123.457), 8, 3))

checkEvaluation(micros(timestamp), 10123457)
checkEvaluation(
micros(timestamp.copy(year = Literal(10))),
10123457)

timestamp = timestamp.copy(sec = Literal(Decimal(0.0, 8, 6)))
checkEvaluation(millis(timestamp), Decimal(0, 8, 3))
checkEvaluation(micros(timestamp), 0)

timestamp = timestamp.copy(sec = Literal(Decimal(BigDecimal(59.999999), 8, 6)))
checkEvaluation(millis(timestamp), Decimal(BigDecimal(59999.999), 8, 3))
checkEvaluation(micros(timestamp), 59999999)

timestamp = timestamp.copy(sec = Literal(Decimal(BigDecimal(60.0), 8, 6)))
checkEvaluation(millis(timestamp), Decimal(0, 8, 3))
checkEvaluation(micros(timestamp), 0)
}
}

test("epoch") {
val zoneId = ZoneId.systemDefault()
val nanos = 123456000
val timestamp = Epoch(MakeTimestamp(
Literal(2019), Literal(8), Literal(9), Literal(0), Literal(0),
Literal(Decimal(nanos / NANOS_PER_SECOND.toDouble, 8, 6)),
Some(Literal(zoneId.getId))))
val instant = LocalDateTime.of(2019, 8, 9, 0, 0, 0, nanos)
.atZone(zoneId).toInstant
val expected = Decimal(BigDecimal(nanos) / NANOS_PER_SECOND +
instant.getEpochSecond +
zoneId.getRules.getOffset(instant).getTotalSeconds)
checkEvaluation(timestamp, expected)
}

test("ISO 8601 week-numbering year") {
checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(1))), 2005)
checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(2))), 2006)
checkEvaluation(YearOfWeek(MakeDate(Literal(2006), Literal(1), Literal(1))), 2005)
checkEvaluation(YearOfWeek(MakeDate(Literal(2006), Literal(1), Literal(2))), 2006)
}

test("extract the seconds part with fraction from timestamps") {
Expand Down
Loading