Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,7 @@ object FunctionRegistry {
expression[WeekOfYear]("weekofyear"),
expression[Year]("year"),
expression[TimeWindow]("window"),
expression[MakeDate]("make_date"),

// collection functions
expression[CreateArray]("array"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1605,3 +1605,55 @@ private case class GetTimestamp(
override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
copy(timeZoneId = Option(timeZoneId))
}

@ExpressionDescription(
usage = "_FUNC_(year, month, day) - Create date from year, month and day fields.",
arguments = """
Arguments:
* year - the year to represent, from 1 to 9999
* month - the month-of-year to represent, from 1 (January) to 12 (December)
* day - the day-of-month to represent, from 1 to 31
""",
examples = """
Examples:
> SELECT _FUNC_(2013, 7, 15);
2013-07-15
> SELECT _FUNC_(2019, 13, 1);
NULL
> SELECT _FUNC_(2019, 7, NULL);
NULL
> SELECT _FUNC_(2019, 2, 30);
NULL
""",
since = "3.0.0")
case class MakeDate(year: Expression, month: Expression, day: Expression)
extends TernaryExpression with ImplicitCastInputTypes {

override def children: Seq[Expression] = Seq(year, month, day)
override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType)
override def dataType: DataType = DateType
override def nullable: Boolean = true

override def nullSafeEval(year: Any, month: Any, day: Any): Any = {
try {
val ld = LocalDate.of(year.asInstanceOf[Int], month.asInstanceOf[Int], day.asInstanceOf[Int])
localDateToDays(ld)
} catch {
case _: java.time.DateTimeException => null
}
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
nullSafeCodeGen(ctx, ev, (year, month, day) => {
s"""
try {
${ev.value} = $dtu.localDateToDays(java.time.LocalDate.of($year, $month, $day));
} catch (java.time.DateTimeException e) {
${ev.isNull} = true;
}"""
})
}

override def prettyName: String = "make_date"
}
Original file line number Diff line number Diff line change
Expand Up @@ -918,4 +918,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}
}

test("creating values of DateType via make_date") {
checkEvaluation(MakeDate(Literal(2013), Literal(7), Literal(15)), Date.valueOf("2013-7-15"))
checkEvaluation(MakeDate(Literal.create(null, IntegerType), Literal(7), Literal(15)), null)
checkEvaluation(MakeDate(Literal(2019), Literal.create(null, IntegerType), Literal(19)), null)
checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal.create(null, IntegerType)), null)
checkEvaluation(MakeDate(Literal(Int.MaxValue), Literal(13), Literal(19)), null)
checkEvaluation(MakeDate(Literal(2019), Literal(13), Literal(19)), null)
checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal(32)), null)
}
}
12 changes: 6 additions & 6 deletions sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql
Original file line number Diff line number Diff line change
Expand Up @@ -349,15 +349,15 @@ SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL;
-- SELECT EXTRACT(MICROSEC FROM DATE 'infinity'); -- ERROR: timestamp units "microsec" not recognized
-- SELECT EXTRACT(UNDEFINED FROM DATE 'infinity'); -- ERROR: timestamp units "undefined" not supported

-- skip test constructors
-- test constructors
-- select make_date(2013, 7, 15);
-- select make_date(-44, 3, 15);
select make_date(2013, 7, 15);
-- [SPARK-28471] Formatting dates with negative years
select make_date(-44, 3, 15);
-- select make_time(8, 20, 0.0);
-- should fail
-- select make_date(2013, 2, 30);
-- select make_date(2013, 13, 1);
-- select make_date(2013, 11, -1);
select make_date(2013, 2, 30);
select make_date(2013, 13, 1);
select make_date(2013, 11, -1);
-- select make_time(10, 55, 100.1);
-- select make_time(24, 0, 2.1);

Expand Down
46 changes: 43 additions & 3 deletions sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 48
-- Number of queries: 53


-- !query 0
Expand Down Expand Up @@ -508,8 +508,48 @@ struct<Days From 2K:int>


-- !query 47
DROP TABLE DATE_TBL
select make_date(2013, 7, 15)
-- !query 47 schema
struct<>
struct<make_date(2013, 7, 15):date>
-- !query 47 output
2013-07-15


-- !query 48
select make_date(-44, 3, 15)
-- !query 48 schema
struct<make_date(-44, 3, 15):date>
-- !query 48 output
0045-03-15
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The year -44 is out of valid range according to SQL standard. We are getting 45 instead of -44 while converting to java.sql.Date. If you switch to Java 8 API for date/timestamps:

scala> spark.conf.set("spark.sql.datetime.java8API.enabled", true)

scala> spark.sql("select make_date(-44, 3, 15)").collect
res7: Array[org.apache.spark.sql.Row] = Array([-0044-03-15])

the returned instance of java.time.LocalDate seems reasonable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to file a JIRA issue for this difference on make_date input range checking. Also, please add the JIRA id to date.sql file.

FYI, the following is PostgreSQL output.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, the reason for the difference here is how Spark converts internal type DateType to external one java.sql.Date (by default) or java.sql.LocalDate (when spark.sql.datetime.java8API.enabled is set to true). And how the external type converted to string. For example, to get the same format as PostgreSQL, need to provide the appropriate formatter.

For java.sql.Date:

spark.conf.set("spark.sql.datetime.java8API.enabled", false)
val date = spark.sql("select make_date(-44, 3, 15)").first.getAs[java.sql.Date](0)
val sdf = new java.text.SimpleDateFormat("MM-dd-yyyy G")
scala> sdf.format(date)
res18: String = 03-17-0045 BC

For Java8 java.time.LocalDate:

spark.conf.set("spark.sql.datetime.java8API.enabled", true)
val formatter = DateTimeFormatter.ofPattern("MM-dd-yyyy G")
val localDate = spark.sql("select make_date(-44, 3, 15)").first.getAs[LocalDate](0)
localDate.format(formatter)
res16: String = 03-15-0045 BC

The difference in days due to difference calendars (Julian in the first case, and Proleptic Gregorian in the second one).

I see Postgres formats the -44 year as 44 BC which is wrong according to ISO 8601. See https://en.wikipedia.org/wiki/Year_zero , for example:

The "basic" format for year 0 ... year 1 BC. ... hence -0001 = 2 BC.

I don't think we should implement Postgres bugs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, what I mean is we need a JIRA report and JIRA ID comment because we don't follow PostgreSQL like that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, you can file a PostgreSQL bug and use that instead of SPARK JIRA.
In both ways, we should report the difference.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



-- !query 49
select make_date(2013, 2, 30)
-- !query 49 schema
struct<make_date(2013, 2, 30):date>
-- !query 49 output
NULL


-- !query 50
select make_date(2013, 13, 1)
-- !query 50 schema
struct<make_date(2013, 13, 1):date>
-- !query 50 output
NULL


-- !query 51
select make_date(2013, 11, -1)
-- !query 51 schema
struct<make_date(2013, 11, -1):date>
-- !query 51 output
NULL


-- !query 52
DROP TABLE DATE_TBL
-- !query 52 schema
struct<>
-- !query 52 output