-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-8176] [SPARK-8197] [SQL] function to_date/ trunc #6988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -272,6 +272,26 @@ case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitC | |
| override def prettyName: String = "last_day" | ||
| } | ||
|
|
||
| /** | ||
| * Returns the date part of a timestamp string. | ||
| */ | ||
| case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { | ||
|
|
||
| // Implicit casting of spark will accept string in both date and timestamp format, as | ||
| // well as TimestampType. | ||
| override def inputTypes: Seq[AbstractDataType] = Seq(DateType) | ||
|
|
||
| override def dataType: DataType = DateType | ||
|
|
||
| override def eval(input: InternalRow): Any = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll delete
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, sounds reasonable to me. |
||
| child.eval(input) | ||
| } | ||
|
|
||
| override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. override the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you could override |
||
| defineCodeGen(ctx, ev, (time) => time) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns the first date which is later than startDate and named as dayOfWeek. | ||
| * For example, NextDay(2015-07-27, Sunday) would return 2015-08-02, which is the first | ||
|
|
@@ -283,6 +303,7 @@ case class NextDay(startDate: Expression, dayOfWeek: Expression) | |
| extends BinaryExpression with ImplicitCastInputTypes { | ||
|
|
||
| override def left: Expression = startDate | ||
|
|
||
| override def right: Expression = dayOfWeek | ||
|
|
||
| override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) | ||
|
|
@@ -330,3 +351,88 @@ case class NextDay(startDate: Expression, dayOfWeek: Expression) | |
|
|
||
| override def prettyName: String = "next_day" | ||
| } | ||
|
|
||
| /** | ||
| * Returns date truncated to the unit specified by the format. | ||
| */ | ||
| case class Trunc(date: Expression, format: Expression) | ||
| extends BinaryExpression with ImplicitCastInputTypes { | ||
| override def left: Expression = date | ||
| override def right: Expression = format | ||
|
|
||
| override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) | ||
| override def dataType: DataType = DateType | ||
|
|
||
| lazy val constFmt = format.eval().asInstanceOf[UTF8String] | ||
|
|
||
| override def eval(input: InternalRow): Any = { | ||
| if (format.foldable) { | ||
| val minItem = DateTimeUtils.getFmt(constFmt) | ||
| if (minItem == -1) { | ||
| // unknown format | ||
| null | ||
| } else { | ||
| val d = date.eval(input) | ||
| if (d == null) { | ||
| null | ||
| } else { | ||
| DateTimeUtils.dateTrunc(d.asInstanceOf[Int], minItem) | ||
| } | ||
| } | ||
| } else { | ||
| val fmt = format.eval(input).asInstanceOf[UTF8String] | ||
| val d = date.eval(input) | ||
| if (d == null) { | ||
| null | ||
| } else { | ||
| val minItem = DateTimeUtils.getFmt(fmt) | ||
| if (minItem == -1) { | ||
| // unknown format | ||
| null | ||
| } else { | ||
| DateTimeUtils.dateTrunc(d.asInstanceOf[Int], minItem) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
| val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") | ||
| if (date.foldable) { | ||
| val d = date.gen(ctx) | ||
| val minItem = DateTimeUtils.getFmt(constFmt) | ||
| if (d == null || minItem == -1) { | ||
| s""" | ||
| boolean ${ev.isNull} = true; | ||
| ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; | ||
| """ | ||
| } else { | ||
| s""" | ||
| ${d.code} | ||
| boolean ${ev.isNull} = ${d.isNull}; | ||
| ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; | ||
| if (!${ev.isNull}) { | ||
| if ($minItem == -1) { | ||
| ${ev.isNull} = true; | ||
| } else { | ||
| ${ev.primitive} = $dtu.dateTrunc(${d.primitive}, $minItem); | ||
| } | ||
| } | ||
| """ | ||
| } | ||
| } else { | ||
| nullSafeCodeGen(ctx, ev, (dateVal, fmt) => { | ||
| val form = ctx.freshName("form") | ||
| s""" | ||
| int $form = $dtu.getFmt($fmt); | ||
| if ($form == -1) { | ||
| ${ev.isNull} = true; | ||
| } else { | ||
| ${ev.primitive} = $dtu.dateTrunc($dateVal, $form); | ||
| } | ||
| """ | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1015,22 +1015,6 @@ object functions { | |
| */ | ||
| def cosh(columnName: String): Column = cosh(Column(columnName)) | ||
|
|
||
| /** | ||
| * Returns the current date. | ||
| * | ||
| * @group datetime_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def current_date(): Column = CurrentDate() | ||
|
|
||
| /** | ||
| * Returns the current timestamp. | ||
| * | ||
| * @group datetime_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def current_timestamp(): Column = CurrentTimestamp() | ||
|
|
||
| /** | ||
| * Computes the exponential of the given value. | ||
| * | ||
|
|
@@ -1916,6 +1900,22 @@ object functions { | |
| // DateTime functions | ||
| ////////////////////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| /** | ||
| * Returns the current date. | ||
| * | ||
| * @group datetime_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def current_date(): Column = CurrentDate() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably we don't want to this as the DF API. |
||
|
|
||
| /** | ||
| * Returns the current timestamp. | ||
| * | ||
| * @group datetime_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def current_timestamp(): Column = CurrentTimestamp() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably we don't want to this as the DF API.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not like the case of e/pi, there were some considerations in the previous pr for this. |
||
|
|
||
| /** | ||
| * Converts a date/timestamp/string to a value of string in the format specified by the date | ||
| * format given by the second argument. | ||
|
|
@@ -2099,6 +2099,22 @@ object functions { | |
| */ | ||
| def weekofyear(columnName: String): Column = weekofyear(Column(columnName)) | ||
|
|
||
| /** | ||
| * Returns date truncated to the unit specified by the format. | ||
| * | ||
| * @group datetime_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def to_date(e: Column): Column = ToDate(e.expr) | ||
|
|
||
| /** | ||
| * Returns date truncated to the unit specified by the format. | ||
| * | ||
| * @group datetime_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def trunc(date: Column, format: Column): Column = Trunc(date.expr, format.expr) | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////////////////////// | ||
| // Collection functions | ||
| ////////////////////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably only accept the
DateType,TimestampTypeand theStringType, should beSeq(TypeCollection(DateType, TimestampType, StringType)).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TimestampType, StringType should all be convert into DateType.
Actually, I can add a rule in optimizer to delete this node after we have done implicit cast.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean we don't accept the
IntegerType,LongType, do we? Hence we use theTypeCollection, and we'd better inherits from theExpectInputTypes, instead of theImplicitCastInputTypes.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
int and long could not be cast into datetype.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems Hive only support date,string or timestamp, but not int, long..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if the cast does not work, we will get an exception, that's expected.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, seems we can remove this expression in optimization.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we removed this expression during optimizing, then we can remove the code gen stuff, too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a comment says that this rely on implicit casting (from StringType and TimestampType)?