From f7bc993a4ce06589c4a825cc30a0a081ebe167e6 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sat, 18 Apr 2020 02:15:06 +0800 Subject: [PATCH 1/4] [SPARK-31474][SQL] Consistancy between dayofweek/dow in extract expression and dayofweek function --- .../expressions/datetimeExpressions.scala | 65 ++++++++++--------- .../sql-tests/results/date_part.sql.out | 2 +- .../sql-tests/results/extract.sql.out | 2 +- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 98536caef7be..108b0e829a64 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2089,8 +2089,7 @@ object DatePart { case "MONTH" | "MON" | "MONS" | "MONTHS" => Month(source) case "WEEK" | "W" | "WEEKS" => WeekOfYear(source) case "DAY" | "D" | "DAYS" => DayOfMonth(source) - case "DAYOFWEEK" => DayOfWeek(source) - case "DOW" => Subtract(DayOfWeek(source), Literal(1)) + case "DOW" | "DAYOFWEEK" => DayOfWeek(source) case "ISODOW" => Add(WeekDay(source), Literal(1)) case "DOY" => DayOfYear(source) case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => Hour(source) @@ -2105,38 +2104,42 @@ object DatePart { } } +// scalastyle:off line.size.limit @ExpressionDescription( usage = "_FUNC_(field, source) - Extracts a part of the date/timestamp or interval source.", arguments = """ Arguments: * field - selects which part of the source should be extracted. - Supported string values of `field` for dates and timestamps are: - ["MILLENNIUM", ("MILLENNIA", "MIL", "MILS"), - "CENTURY", ("CENTURIES", "C", "CENT"), - "DECADE", ("DECADES", "DEC", "DECS"), - "YEAR", ("Y", "YEARS", "YR", "YRS"), - "ISOYEAR", - "QUARTER", ("QTR"), - "MONTH", ("MON", "MONS", "MONTHS"), - "WEEK", ("W", "WEEKS"), - "DAY", ("D", "DAYS"), - "DAYOFWEEK", - "DOW", - "ISODOW", - "DOY", - "HOUR", ("H", "HOURS", "HR", "HRS"), - "MINUTE", ("M", "MIN", "MINS", "MINUTES"), - "SECOND", ("S", "SEC", "SECONDS", "SECS"), - "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS"), - "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"), - "EPOCH"] - Supported string values of `field` for intervals are: - ["YEAR", ("Y", "YEARS", "YR", "YRS"), - "MONTH", ("MON", "MONS", "MONTHS"), - "DAY", ("D", "DAYS"), - "HOUR", ("H", "HOURS", "HR", "HRS"), - "MINUTE", ("M", "MIN", "MINS", "MINUTES"), - "SECOND", ("S", "SEC", "SECONDS", "SECS")] + + * source - a date/timestamp or interval column from where `field` should be extracted """, examples = """ @@ -2154,7 +2157,11 @@ object DatePart { > SELECT _FUNC_('seconds', interval 5 hours 30 seconds 1 milliseconds 1 microseconds); 30.001001 """, + note = """ + The _FUNC_ function is equivalent to the SQL-standard function `extract` + """, since = "3.0.0") +// scalastyle:off line.size.limit case class DatePart(field: Expression, source: Expression, child: Expression) extends RuntimeReplaceable { diff --git a/sql/core/src/test/resources/sql-tests/results/date_part.sql.out b/sql/core/src/test/resources/sql-tests/results/date_part.sql.out index 702ac17f7a4e..028448b4e3a3 100644 --- a/sql/core/src/test/resources/sql-tests/results/date_part.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/date_part.sql.out @@ -255,7 +255,7 @@ select date_part('dow', c) from t -- !query schema struct -- !query output -5 +6 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index 1f77c67871ae..171a2282df2b 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -263,7 +263,7 @@ select extract(dow from c) from t -- !query schema struct -- !query output -5 +6 -- !query From 746eedfcdd2f75d0da457fb804c53de0deaaf382 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sat, 18 Apr 2020 11:07:46 +0800 Subject: [PATCH 2/4] regen test result --- .../sql-tests/results/postgreSQL/timestamp.sql.out | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out index 75ea3f3c4293..abfce9180ab3 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out @@ -288,11 +288,11 @@ SELECT '' AS `54`, d1 as `timestamp`, -- !query schema struct<54:string,timestamp:timestamp,isoyear:int,week:int,dow:int> -- !query output - 1969-12-31 16:00:00 1970 1 3 - 1997-01-02 00:00:00 1997 1 4 - 1997-01-02 03:04:05 1997 1 4 - 1997-02-10 17:32:01 1997 7 1 - 2001-09-22 18:19:20 2001 38 6 + 1969-12-31 16:00:00 1970 1 4 + 1997-01-02 00:00:00 1997 1 5 + 1997-01-02 03:04:05 1997 1 5 + 1997-02-10 17:32:01 1997 7 2 + 2001-09-22 18:19:20 2001 38 7 -- !query From 0b9059748f0b902ab01b8eb3598f4bd36f069ecd Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 20 Apr 2020 10:58:37 +0800 Subject: [PATCH 3/4] address comments --- .../expressions/datetimeExpressions.scala | 60 +++++++++---------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 386593c01f5e..c74105f12bdd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2135,36 +2135,32 @@ object DatePartLike { arguments = """ Arguments: * field - selects which part of the source should be extracted. -
    - Supported string values of `field` for dates and timestamps are: -
  • "MILLENNIUM", ("MILLENNIA", "MIL", "MILS") - the conventional numbering of millennia
  • -
  • "CENTURY", ("CENTURIES", "C", "CENT") - the conventional numbering of centuries
  • -
  • "DECADE", ("DECADES", "DEC", "DECS") - the year field divided by 10
  • -
  • "YEAR", ("Y", "YEARS", "YR", "YRS") - the year field
  • -
  • "ISOYEAR" - the ISO 8601 week-numbering year that the datetime falls in
  • -
  • "QUARTER", ("QTR") - the quarter (1 - 4) of the year that the datetime falls in
  • -
  • "MONTH", ("MON", "MONS", "MONTHS") - the month field
  • -
  • "WEEK", ("W", "WEEKS") - the number of the ISO 8601 week-of-week-based-year. A week is considered to start on a Monday and week 1 is the first week with >3 days. In the ISO week-numbering system, it is possible for early-January dates to be part of the 52nd or 53rd week of the previous year, and for late-December dates to be part of the first week of the next year. For example, 2005-01-02 is part of the 53rd week of year 2004, while 2012-12-31 is part of the first week of 2013
  • -
  • "DAY", ("D", "DAYS") - the day of the month field (1 - 31)
  • -
  • "DAYOFWEEK",("DOW") - the day of the week for datetime as Sunday(1) to Saturday(7)
  • -
  • "ISODOW" - ISO 8601 based day of the week for datetime as Monday(1) to Sunday(7)
  • -
  • "DOY" - the day of the year (1 - 365/366)
  • -
  • "HOUR", ("H", "HOURS", "HR", "HRS") - The hour field (0 - 23)
  • -
  • "MINUTE", ("M", "MIN", "MINS", "MINUTES") - the minutes field (0 - 59)
  • -
  • "SECOND", ("S", "SEC", "SECONDS", "SECS") - the seconds field, including fractional parts
  • -
  • "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS") - the seconds field, including fractional parts, multiplied by 1000. Note that this includes full seconds
  • -
  • "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US") - The seconds field, including fractional parts, multiplied by 1000000. Note that this includes full seconds
  • -
  • "EPOCH" - the number of seconds with fractional part in microsecond precision since 1970-01-01 00:00:00 local time (can be negative)
  • -
-
    - Supported string values of `field` for interval(which consists of `months`, `days`, `microseconds`) are: -
  • "YEAR", ("Y", "YEARS", "YR", "YRS") - the total `months` / 12
  • -
  • "MONTH", ("MON", "MONS", "MONTHS") - the total `months` modulo 12
  • -
  • "DAY", ("D", "DAYS") - the `days` part of interval
  • -
  • "HOUR", ("H", "HOURS", "HR", "HRS") - how many hours the `microseconds` contains
  • -
  • "MINUTE", ("M", "MIN", "MINS", "MINUTES") - how many minutes left after taking hours from `microseconds`
  • -
  • "SECOND", ("S", "SEC", "SECONDS", "SECS") - how many second with fractions left after taking hours and minutes from `microseconds`
  • -
+ - Supported string values of `field` for dates and timestamps are: + - "MILLENNIUM", ("MILLENNIA", "MIL", "MILS") - the conventional numbering of millennia + - "CENTURY", ("CENTURIES", "C", "CENT") - the conventional numbering of centuries + - "DECADE", ("DECADES", "DEC", "DECS") - the year field divided by 1 + - "YEAR", ("Y", "YEARS", "YR", "YRS") - the year field + - "ISOYEAR" - the ISO 8601 week-numbering year that the datetime falls in + - "QUARTER", ("QTR") - the quarter (1 - 4) of the year that the datetime falls in + - "MONTH", ("MON", "MONS", "MONTHS") - the month field + - "WEEK", ("W", "WEEKS") - the number of the ISO 8601 week-of-week-based-year. A week is considered to start on a Monday and week 1 is the first week with >3 days. In the ISO week-numbering system, it is possible for early-January dates to be part of the 52nd or 53rd week of the previous year, and for late-December dates to be part of the first week of the next year. For example, 2005-01-02 is part of the 53rd week of year 2004, while 2012-12-31 is part of the first week of 2013 + - "DAY", ("D", "DAYS") - the day of the month field (1 - 31) + - "DAYOFWEEK",("DOW") - the day of the week for datetime as Sunday(1) to Saturday(7) + - "ISODOW" - ISO 8601 based day of the week for datetime as Monday(1) to Sunday(7) + - "DOY" - the day of the year (1 - 365/366) + - "HOUR", ("H", "HOURS", "HR", "HRS") - The hour field (0 - 23) + - "MINUTE", ("M", "MIN", "MINS", "MINUTES") - the minutes field (0 - 59) + - "SECOND", ("S", "SEC", "SECONDS", "SECS") - the seconds field, including fractional parts + - "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS") - the seconds field, including fractional parts, multiplied by 1000. Note that this includes full seconds + - "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US") - The seconds field, including fractional parts, multiplied by 1000000. Note that this includes full seconds + - "EPOCH" - the number of seconds with fractional part in microsecond precision since 1970-01-01 00:00:00 local time (can be negative) + - Supported string values of `field` for interval(which consists of `months`, `days`, `microseconds`) are: + - "YEAR", ("Y", "YEARS", "YR", "YRS") - the total `months` / 12 + - "MONTH", ("MON", "MONS", "MONTHS") - the total `months` modulo 12 + - "DAY", ("D", "DAYS") - the `days` part of interval + - "HOUR", ("H", "HOURS", "HR", "HRS") - how many hours the `microseconds` contains + - "MINUTE", ("M", "MIN", "MINS", "MINUTES") - how many minutes left after taking hours from `microseconds` + - "SECOND", ("S", "SEC", "SECONDS", "SECS") - how many second with fractions left after taking hours and minutes from `microseconds` * source - a date/timestamp or interval column from where `field` should be extracted """, examples = """ @@ -2183,7 +2179,7 @@ object DatePartLike { 30.001001 """, note = """ - The _FUNC_ function is equivalent to the SQL-standard function extract + The _FUNC_ function is equivalent to the SQL-standard function `extract` """, since = "3.0.0") // scalastyle:on line.size.limit @@ -2224,7 +2220,7 @@ case class DatePart(field: Expression, source: Expression, child: Expression) 30.001001 """, note = """ - The _FUNC_ function is equivalent to `date_part`. See date_part for detail. + The _FUNC_ function is equivalent to `date_part`. """, since = "3.0.0") // scalastyle:on line.size.limit From 2cedf2ab589322a71e7dd87efb31d98b2d49f360 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 20 Apr 2020 15:17:42 +0800 Subject: [PATCH 4/4] address comments --- .../expressions/datetimeExpressions.scala | 61 +++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index c74105f12bdd..9c2efae10813 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2134,33 +2134,7 @@ object DatePartLike { usage = "_FUNC_(field, source) - Extracts a part of the date/timestamp or interval source.", arguments = """ Arguments: - * field - selects which part of the source should be extracted. - - Supported string values of `field` for dates and timestamps are: - - "MILLENNIUM", ("MILLENNIA", "MIL", "MILS") - the conventional numbering of millennia - - "CENTURY", ("CENTURIES", "C", "CENT") - the conventional numbering of centuries - - "DECADE", ("DECADES", "DEC", "DECS") - the year field divided by 1 - - "YEAR", ("Y", "YEARS", "YR", "YRS") - the year field - - "ISOYEAR" - the ISO 8601 week-numbering year that the datetime falls in - - "QUARTER", ("QTR") - the quarter (1 - 4) of the year that the datetime falls in - - "MONTH", ("MON", "MONS", "MONTHS") - the month field - - "WEEK", ("W", "WEEKS") - the number of the ISO 8601 week-of-week-based-year. A week is considered to start on a Monday and week 1 is the first week with >3 days. In the ISO week-numbering system, it is possible for early-January dates to be part of the 52nd or 53rd week of the previous year, and for late-December dates to be part of the first week of the next year. For example, 2005-01-02 is part of the 53rd week of year 2004, while 2012-12-31 is part of the first week of 2013 - - "DAY", ("D", "DAYS") - the day of the month field (1 - 31) - - "DAYOFWEEK",("DOW") - the day of the week for datetime as Sunday(1) to Saturday(7) - - "ISODOW" - ISO 8601 based day of the week for datetime as Monday(1) to Sunday(7) - - "DOY" - the day of the year (1 - 365/366) - - "HOUR", ("H", "HOURS", "HR", "HRS") - The hour field (0 - 23) - - "MINUTE", ("M", "MIN", "MINS", "MINUTES") - the minutes field (0 - 59) - - "SECOND", ("S", "SEC", "SECONDS", "SECS") - the seconds field, including fractional parts - - "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS") - the seconds field, including fractional parts, multiplied by 1000. Note that this includes full seconds - - "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US") - The seconds field, including fractional parts, multiplied by 1000000. Note that this includes full seconds - - "EPOCH" - the number of seconds with fractional part in microsecond precision since 1970-01-01 00:00:00 local time (can be negative) - - Supported string values of `field` for interval(which consists of `months`, `days`, `microseconds`) are: - - "YEAR", ("Y", "YEARS", "YR", "YRS") - the total `months` / 12 - - "MONTH", ("MON", "MONS", "MONTHS") - the total `months` modulo 12 - - "DAY", ("D", "DAYS") - the `days` part of interval - - "HOUR", ("H", "HOURS", "HR", "HRS") - how many hours the `microseconds` contains - - "MINUTE", ("M", "MIN", "MINS", "MINUTES") - how many minutes left after taking hours from `microseconds` - - "SECOND", ("S", "SEC", "SECONDS", "SECS") - how many second with fractions left after taking hours and minutes from `microseconds` + * field - selects which part of the source should be extracted, and supported string values are as same as the fields of the equivalent function `EXTRACT`. * source - a date/timestamp or interval column from where `field` should be extracted """, examples = """ @@ -2179,7 +2153,7 @@ object DatePartLike { 30.001001 """, note = """ - The _FUNC_ function is equivalent to the SQL-standard function `extract` + The _FUNC_ function is equivalent to the SQL-standard function `EXTRACT(field FROM source)` """, since = "3.0.0") // scalastyle:on line.size.limit @@ -2200,8 +2174,33 @@ case class DatePart(field: Expression, source: Expression, child: Expression) usage = "_FUNC_(field FROM source) - Extracts a part of the date/timestamp or interval source.", arguments = """ Arguments: - * field - selects which part of the source should be extracted and supported string values - are the same with the `date_part` fields. + * field - selects which part of the source should be extracted + - Supported string values of `field` for dates and timestamps are: + - "MILLENNIUM", ("MILLENNIA", "MIL", "MILS") - the conventional numbering of millennia + - "CENTURY", ("CENTURIES", "C", "CENT") - the conventional numbering of centuries + - "DECADE", ("DECADES", "DEC", "DECS") - the year field divided by 10 + - "YEAR", ("Y", "YEARS", "YR", "YRS") - the year field + - "ISOYEAR" - the ISO 8601 week-numbering year that the datetime falls in + - "QUARTER", ("QTR") - the quarter (1 - 4) of the year that the datetime falls in + - "MONTH", ("MON", "MONS", "MONTHS") - the month field (1 - 12) + - "WEEK", ("W", "WEEKS") - the number of the ISO 8601 week-of-week-based-year. A week is considered to start on a Monday and week 1 is the first week with >3 days. In the ISO week-numbering system, it is possible for early-January dates to be part of the 52nd or 53rd week of the previous year, and for late-December dates to be part of the first week of the next year. For example, 2005-01-02 is part of the 53rd week of year 2004, while 2012-12-31 is part of the first week of 2013 + - "DAY", ("D", "DAYS") - the day of the month field (1 - 31) + - "DAYOFWEEK",("DOW") - the day of the week for datetime as Sunday(1) to Saturday(7) + - "ISODOW" - ISO 8601 based day of the week for datetime as Monday(1) to Sunday(7) + - "DOY" - the day of the year (1 - 365/366) + - "HOUR", ("H", "HOURS", "HR", "HRS") - The hour field (0 - 23) + - "MINUTE", ("M", "MIN", "MINS", "MINUTES") - the minutes field (0 - 59) + - "SECOND", ("S", "SEC", "SECONDS", "SECS") - the seconds field, including fractional parts + - "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS") - the seconds field, including fractional parts, multiplied by 1000. Note that this includes full seconds + - "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US") - The seconds field, including fractional parts, multiplied by 1000000. Note that this includes full seconds + - "EPOCH" - the number of seconds with fractional part in microsecond precision since 1970-01-01 00:00:00 local time (can be negative) + - Supported string values of `field` for interval(which consists of `months`, `days`, `microseconds`) are: + - "YEAR", ("Y", "YEARS", "YR", "YRS") - the total `months` / 12 + - "MONTH", ("MON", "MONS", "MONTHS") - the total `months` % 12 + - "DAY", ("D", "DAYS") - the `days` part of interval + - "HOUR", ("H", "HOURS", "HR", "HRS") - how many hours the `microseconds` contains + - "MINUTE", ("M", "MIN", "MINS", "MINUTES") - how many minutes left after taking hours from `microseconds` + - "SECOND", ("S", "SEC", "SECONDS", "SECS") - how many second with fractions left after taking hours and minutes from `microseconds` * source - a date/timestamp or interval column from where `field` should be extracted """, examples = """ @@ -2220,7 +2219,7 @@ case class DatePart(field: Expression, source: Expression, child: Expression) 30.001001 """, note = """ - The _FUNC_ function is equivalent to `date_part`. + The _FUNC_ function is equivalent to `date_part(field, source)`. """, since = "3.0.0") // scalastyle:on line.size.limit