From d2344084a7104a83c629b90f8723810ce3621830 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 18 Sep 2019 21:31:19 +0500 Subject: [PATCH 01/16] Add the config: spark.sql.dialect --- .../apache/spark/sql/internal/SQLConf.scala | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 4f3e39ad49af..c8af7201c0ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1605,6 +1605,24 @@ object SQLConf { .booleanConf .createWithDefault(false) + object Dialect extends Enumeration { + val SPARK, POSTGRESQL = Value + } + + val DIALECT = + buildConf("spark.sql.dialect") + .doc("The specific features of the SQL language to be adopted, which are available when " + + "accessing the given database. Currently, Spark supports two database dialects, `Spark` " + + "and `PostgreSQL`. With `PostgreSQL` dialect, Spark will: " + + "1. perform integral division with the / operator if both sides are integral types; " + + "2. accept \"true\", \"yes\", \"1\", \"false\", \"no\", \"0\", and unique prefixes as " + + "input and trim input for the boolean data type; " + + "3. support special date/timestamp values: epoch, now, today, yesterday and tomorrow.") + .stringConf + .transform(_.toUpperCase(Locale.ROOT)) + .checkValues(Dialect.values.map(_.toString)) + .createWithDefault(Dialect.SPARK.toString) + val PREFER_INTEGRAL_DIVISION = buildConf("spark.sql.function.preferIntegralDivision") .internal() .doc("When true, will perform integral division with the / operator " + From 676d26513c920d295bdf4a574b0b6913e8a85d04 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 18 Sep 2019 21:58:09 +0500 Subject: [PATCH 02/16] Support the config in TimestampFormatter --- .../catalyst/util/TimestampFormatter.scala | 8 ++++- .../sql/util/TimestampFormatterSuite.scala | 36 +++++++++++-------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 5be4807083fa..b6ca7cac3e52 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -27,6 +27,8 @@ import java.util.concurrent.TimeUnit.SECONDS import DateTimeUtils.convertSpecialTimestamp +import org.apache.spark.sql.internal.SQLConf + sealed trait TimestampFormatter extends Serializable { /** * Parses a timestamp in a string and converts it to microseconds. @@ -51,8 +53,12 @@ class Iso8601TimestampFormatter( @transient protected lazy val formatter = getOrCreateFormatter(pattern, locale) + private val supportSpecialValues: Boolean = { + SQLConf.get.getConf(SQLConf.DIALECT) == SQLConf.Dialect.POSTGRESQL.toString + } + override def parse(s: String): Long = { - val specialDate = convertSpecialTimestamp(s.trim, zoneId) + val specialDate = if (supportSpecialValues) convertSpecialTimestamp(s.trim, zoneId) else None specialDate.getOrElse { val parsed = formatter.parse(s) val parsedZoneId = parsed.query(TemporalQueries.zone()) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala index 170daa6277c4..83389dc5a98c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala @@ -137,21 +137,29 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers } test("special timestamp values") { - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => - withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { - val zoneId = getZoneId(timeZone) - val formatter = TimestampFormatter(zoneId) - val tolerance = TimeUnit.SECONDS.toMicros(30) + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.POSTGRESQL.toString) { + DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { + val zoneId = getZoneId(timeZone) + val formatter = TimestampFormatter(zoneId) + val tolerance = TimeUnit.SECONDS.toMicros(30) - assert(formatter.parse("EPOCH") === 0) - val now = instantToMicros(LocalDateTime.now(zoneId).atZone(zoneId).toInstant) - formatter.parse("now") should be (now +- tolerance) - val today = instantToMicros(LocalDateTime.now(zoneId) - .`with`(LocalTime.MIDNIGHT) - .atZone(zoneId).toInstant) - formatter.parse("yesterday CET") should be (today - MICROS_PER_DAY +- tolerance) - formatter.parse(" TODAY ") should be (today +- tolerance) - formatter.parse("Tomorrow ") should be (today + MICROS_PER_DAY +- tolerance) + assert(formatter.parse("EPOCH") === 0) + val now = instantToMicros(LocalDateTime.now(zoneId).atZone(zoneId).toInstant) + formatter.parse("now") should be(now +- tolerance) + val today = instantToMicros(LocalDateTime.now(zoneId) + .`with`(LocalTime.MIDNIGHT) + .atZone(zoneId).toInstant) + formatter.parse("yesterday CET") should be(today - MICROS_PER_DAY +- tolerance) + formatter.parse(" TODAY ") should be(today +- tolerance) + formatter.parse("Tomorrow ") should be(today + MICROS_PER_DAY +- tolerance) + } + } + } + + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.SPARK.toString) { + intercept[java.time.format.DateTimeParseException] { + TimestampFormatter(ZoneOffset.UTC).parse("EPOCH") } } } From e434d3ccd669017a16de169d6e67e001ebc0b62f Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 18 Sep 2019 22:02:59 +0500 Subject: [PATCH 03/16] Add isPostgreSqlDialect to SQLConf --- .../apache/spark/sql/catalyst/util/TimestampFormatter.scala | 4 +--- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index b6ca7cac3e52..f663822cfa2f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -53,9 +53,7 @@ class Iso8601TimestampFormatter( @transient protected lazy val formatter = getOrCreateFormatter(pattern, locale) - private val supportSpecialValues: Boolean = { - SQLConf.get.getConf(SQLConf.DIALECT) == SQLConf.Dialect.POSTGRESQL.toString - } + private val supportSpecialValues: Boolean = SQLConf.get.isPostgreSqlDialect override def parse(s: String): Long = { val specialDate = if (supportSpecialValues) convertSpecialTimestamp(s.trim, zoneId) else None diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index c8af7201c0ef..44719063cf41 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2231,6 +2231,8 @@ class SQLConf extends Serializable with Logging { def utcTimestampFuncEnabled: Boolean = getConf(UTC_TIMESTAMP_FUNC_ENABLED) + def isPostgreSqlDialect: Boolean = getConf(SQLConf.DIALECT) == Dialect.POSTGRESQL.toString + /** * Returns the [[Resolver]] for the current configuration, which can be used to determine if two * identifiers are equal. From ec75350683a570fa24e9499aaa13eccb7af52c58 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 18 Sep 2019 22:33:02 +0500 Subject: [PATCH 04/16] Support the config by stringToTimestamp --- .../spark/sql/catalyst/expressions/Cast.scala | 8 ++- .../sql/catalyst/parser/AstBuilder.scala | 3 +- .../sql/catalyst/util/DateTimeUtils.scala | 11 +++- .../expressions/HashExpressionsSuite.scala | 2 +- .../catalyst/util/DateTimeUtilsSuite.scala | 55 +++++++++++-------- .../sql/catalyst/util/UnsafeArraySuite.scala | 4 +- .../datasources/jdbc/JDBCRelation.scala | 4 +- 7 files changed, 54 insertions(+), 33 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index d1943f02f85e..1f6360c95b8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -290,6 +290,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String private lazy val dateFormatter = DateFormatter() private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) private val failOnIntegralTypeOverflow = SQLConf.get.failOnIntegralTypeOverflow + private val supportSpecialValues = SQLConf.get.isPostgreSqlDialect // UDFToString private[this] def castToString(from: DataType): Any => Any = from match { @@ -423,8 +424,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String // TimestampConverter private[this] def castToTimestamp(from: DataType): Any => Any = from match { - case StringType => - buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull) + case StringType => buildCast[UTF8String](_, utfs => + DateTimeUtils.stringToTimestamp(utfs, zoneId, supportSpecialValues).orNull) case BooleanType => buildCast[Boolean](_, b => if (b) 1L else 0) case LongType => @@ -1174,11 +1175,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String val zid = JavaCode.global( ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), zoneIdClass) + val sv = ctx.addReferenceObj("supportSpecialValues", supportSpecialValues) val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => code""" scala.Option $longOpt = - org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid); + org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid, $sv); if ($longOpt.isDefined()) { $evPrim = ((Long) $longOpt.get()).longValue(); } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 9335be5b239b..5a0d9e4e643a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1737,7 +1737,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging case "DATE" => toLiteral(stringToDate, DateType) case "TIMESTAMP" => val zoneId = getZoneId(SQLConf.get.sessionLocalTimeZone) - toLiteral(stringToTimestamp(_, zoneId), TimestampType) + val supportSpecialValues = SQLConf.get.isPostgreSqlDialect + toLiteral(stringToTimestamp(_, zoneId, supportSpecialValues), TimestampType) case "INTERVAL" => Literal(CalendarInterval.fromString(value), CalendarIntervalType) case "X" => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index a82471aae652..f3383d5a28c1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -211,7 +211,10 @@ object DateTimeUtils { * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` */ - def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[SQLTimestamp] = { + def stringToTimestamp( + s: UTF8String, + timeZoneId: ZoneId, + supportSpecialValues: Boolean): Option[SQLTimestamp] = { if (s == null) { return None } @@ -220,8 +223,10 @@ object DateTimeUtils { var i = 0 var currentSegmentValue = 0 val bytes = s.trim.getBytes - val specialTimestamp = convertSpecialTimestamp(bytes, timeZoneId) - if (specialTimestamp.isDefined) return specialTimestamp + if (supportSpecialValues) { + val specialTimestamp = convertSpecialTimestamp(bytes, timeZoneId) + if (specialTimestamp.isDefined) return specialTimestamp + } var j = 0 var digitsMilli = 0 var justTime = false diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala index b5cfaf8f4b0f..2ec5221fb642 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala @@ -210,7 +210,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { expected: Long, zoneId: ZoneId = ZoneOffset.UTC): Unit = { checkHiveHash( - DateTimeUtils.stringToTimestamp(UTF8String.fromString(timestamp), zoneId).get, + DateTimeUtils.stringToTimestamp(UTF8String.fromString(timestamp), zoneId, false).get, TimestampType, expected) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 31fefd613f9c..ad8f3c5e477e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat -import java.time.{LocalDateTime, LocalTime, ZoneId} +import java.time.{LocalDateTime, LocalTime, ZoneId, ZoneOffset} import java.util.{Locale, TimeZone} import java.util.concurrent.TimeUnit @@ -39,7 +39,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId) def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) { val parsedTimestampOp = DateTimeUtils.stringToTimestamp( - UTF8String.fromString(originalTime), defaultZoneId) + UTF8String.fromString(originalTime), defaultZoneId, false) assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly") assert(DateTimeUtils.timestampToString(tf, parsedTimestampOp.get) === expectedParsedTime) } @@ -144,8 +144,11 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { assert(stringToDate(UTF8String.fromString("1999 08")).isEmpty) } - private def toTimestamp(str: String, zoneId: ZoneId): Option[SQLTimestamp] = { - stringToTimestamp(UTF8String.fromString(str), zoneId) + private def toTimestamp( + str: String, + zoneId: ZoneId, + supportSpecialValues: Boolean = false): Option[SQLTimestamp] = { + stringToTimestamp(UTF8String.fromString(str), zoneId, supportSpecialValues) } test("string to timestamp") { @@ -274,9 +277,9 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { // Test stringToTimestamp assert(stringToTimestamp( - UTF8String.fromString("2015-02-29 00:00:00"), defaultZoneId).isEmpty) + UTF8String.fromString("2015-02-29 00:00:00"), defaultZoneId, false).isEmpty) assert(stringToTimestamp( - UTF8String.fromString("2015-04-31 00:00:00"), defaultZoneId).isEmpty) + UTF8String.fromString("2015-04-31 00:00:00"), defaultZoneId, false).isEmpty) assert(toTimestamp("2015-02-29", defaultZoneId).isEmpty) assert(toTimestamp("2015-04-31", defaultZoneId).isEmpty) } @@ -467,15 +470,15 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { } val defaultInputTS = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-05T09:32:05.359123"), defaultZoneId) + UTF8String.fromString("2015-03-05T09:32:05.359123"), defaultZoneId, false) val defaultInputTS1 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId) + UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId, false) val defaultInputTS2 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId) + UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId, false) val defaultInputTS3 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId) + UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId, false) val defaultInputTS4 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId) + UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId, false) testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", defaultInputTS.get) testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", defaultInputTS.get) @@ -500,17 +503,17 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { for (tz <- ALL_TIMEZONES) { withDefaultTimeZone(tz) { val inputTS = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-05T09:32:05.359"), defaultZoneId) + UTF8String.fromString("2015-03-05T09:32:05.359"), defaultZoneId, false) val inputTS1 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId) + UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId, false) val inputTS2 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId) + UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId, false) val inputTS3 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId) + UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId, false) val inputTS4 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId) + UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId, false) val inputTS5 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("1999-03-29T01:02:03.456789"), defaultZoneId) + UTF8String.fromString("1999-03-29T01:02:03.456789"), defaultZoneId, false) testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz) testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz) @@ -574,16 +577,24 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => val tolerance = TimeUnit.SECONDS.toMicros(30) - assert(toTimestamp("Epoch", zoneId).get === 0) + assert(toTimestamp("Epoch", zoneId, true).get === 0) val now = instantToMicros(LocalDateTime.now(zoneId).atZone(zoneId).toInstant) - toTimestamp("NOW", zoneId).get should be (now +- tolerance) + toTimestamp("NOW", zoneId, true).get should be (now +- tolerance) assert(toTimestamp("now UTC", zoneId) === None) val today = instantToMicros(LocalDateTime.now(zoneId) .`with`(LocalTime.MIDNIGHT) .atZone(zoneId).toInstant) - toTimestamp(" Yesterday", zoneId).get should be (today - MICROS_PER_DAY +- tolerance) - toTimestamp("Today ", zoneId).get should be (today +- tolerance) - toTimestamp(" tomorrow CET ", zoneId).get should be (today + MICROS_PER_DAY +- tolerance) + toTimestamp(" Yesterday", zoneId, true).get should be (today - MICROS_PER_DAY +- tolerance) + toTimestamp("Today ", zoneId, true).get should be (today +- tolerance) + toTimestamp(" tomorrow CET ", zoneId, true).get should be + (today + MICROS_PER_DAY +- tolerance) + + // It must return None when support of special values is disabled + assert(toTimestamp("Epoch", zoneId, false) === None) + + // Parsing of regular timestamps must not fail when support of special values is enabled + assert(toTimestamp("2019-09-18 22:26:30Z", ZoneOffset.UTC).get === + date(2019, 9, 18, 22, 26, 30)) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala index 0b9e023b0b45..9f9c78e75766 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala @@ -43,9 +43,9 @@ class UnsafeArraySuite extends SparkFunSuite { private def defaultZoneId = ZoneId.systemDefault() val timestampArray = Array( DateTimeUtils.stringToTimestamp( - UTF8String.fromString("1970-1-1 00:00:00"), defaultZoneId).get, + UTF8String.fromString("1970-1-1 00:00:00"), defaultZoneId, false).get, DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2016-7-26 00:00:00"), defaultZoneId).get) + UTF8String.fromString("2016-7-26 00:00:00"), defaultZoneId, false).get) val decimalArray4_1 = Array( BigDecimal("123.4").setScale(1, BigDecimal.RoundingMode.FLOOR), BigDecimal("567.8").setScale(1, BigDecimal.RoundingMode.FLOOR)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala index 3cd5cb164792..0cb1cf4f8d1c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala @@ -187,7 +187,9 @@ private[sql] object JDBCRelation extends Logging { columnType match { case _: NumericType => value.toLong case DateType => parse(stringToDate).toLong - case TimestampType => parse(stringToTimestamp(_, getZoneId(timeZoneId))) + case TimestampType => parse(stringToTimestamp(_, + getZoneId(timeZoneId), + supportSpecialValues = SQLConf.get.isPostgreSqlDialect)) } } From 3cd837f99c956242a344b4d27f9dafadd8b34756 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 18 Sep 2019 22:38:15 +0500 Subject: [PATCH 05/16] Fix Json/CsvFunctionsSuite --- .../org/apache/spark/sql/CsvFunctionsSuite.scala | 12 +++++++----- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala index 1094d7d23e5e..60d67b51193e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala @@ -184,11 +184,13 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession { } test("special timestamp values") { - Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue => - val input = Seq(specialValue).toDS() - val readback = input.select(from_csv($"value", lit("t timestamp"), - Map.empty[String, String].asJava)).collect() - assert(readback(0).getAs[Row](0).getAs[Timestamp](0).getTime >= 0) + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.POSTGRESQL.toString) { + Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue => + val input = Seq(specialValue).toDS() + val readback = input.select(from_csv($"value", lit("t timestamp"), + Map.empty[String, String].asJava)).collect() + assert(readback(0).getAs[Row](0).getAs[Timestamp](0).getTime >= 0) + } } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index c61c8109ee8e..92df05929de4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -611,11 +611,13 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { } test("special timestamp values") { - Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue => - val input = Seq(s"""{"t": "$specialValue"}""").toDS() - val readback = input.select(from_json($"value", lit("t timestamp"), - Map.empty[String, String].asJava)).collect() - assert(readback(0).getAs[Row](0).getAs[Timestamp](0).getTime >= 0) + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.POSTGRESQL.toString) { + Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue => + val input = Seq(s"""{"t": "$specialValue"}""").toDS() + val readback = input.select(from_json($"value", lit("t timestamp"), + Map.empty[String, String].asJava)).collect() + assert(readback(0).getAs[Row](0).getAs[Timestamp](0).getTime >= 0) + } } } } From 2d479060b317dbbfe79c2eb22a21ed1516872831 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 18 Sep 2019 23:03:38 +0500 Subject: [PATCH 06/16] Enable the PostgreSQL dialect for PostgreSQL tests --- .../src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index cc92d6556387..12cbcedde558 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -315,6 +315,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { // TODO: remove this after SPARK-29122 is resolved. localSparkSession.sparkContext.setLocalProperty( SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key, "true") + localSparkSession.conf.set(SQLConf.DIALECT.key, SQLConf.Dialect.POSTGRESQL.toString) case _ => } From bbdd65f4e2fda2071dac0cb76e1afa71519c74e3 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 22 Sep 2019 20:59:55 +0500 Subject: [PATCH 07/16] Support the flag in DateFormatter --- .../sql/catalyst/util/DateFormatter.scala | 4 ++- .../util/DateTimeFormatterHelper.scala | 3 ++ .../catalyst/util/TimestampFormatter.scala | 8 +++--- .../spark/sql/util/DateFormatterSuite.scala | 28 ++++++++++++------- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index 7f982b019c8d..72e18eb80afe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -36,7 +36,9 @@ class Iso8601DateFormatter( private lazy val formatter = getOrCreateFormatter(pattern, locale) override def parse(s: String): Int = { - val specialDate = convertSpecialDate(s.trim, zoneId) + val specialDate = if (supportSpecialValues) { + convertSpecialDate(s.trim, zoneId) + } else None specialDate.getOrElse { val localDate = LocalDate.parse(s, formatter) localDateToDays(localDate) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index a7b6309baf61..a9a74df3cdcc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -26,8 +26,11 @@ import java.util.Locale import com.google.common.cache.CacheBuilder import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._ +import org.apache.spark.sql.internal.SQLConf trait DateTimeFormatterHelper { + protected val supportSpecialValues: Boolean = SQLConf.get.isPostgreSqlDialect + // Converts the parsed temporal object to ZonedDateTime. It sets time components to zeros // if they does not exist in the parsed object. protected def toZonedDateTime( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index f663822cfa2f..d016acfd1957 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -53,11 +53,11 @@ class Iso8601TimestampFormatter( @transient protected lazy val formatter = getOrCreateFormatter(pattern, locale) - private val supportSpecialValues: Boolean = SQLConf.get.isPostgreSqlDialect - override def parse(s: String): Long = { - val specialDate = if (supportSpecialValues) convertSpecialTimestamp(s.trim, zoneId) else None - specialDate.getOrElse { + val specialTimestamp = if (supportSpecialValues) { + convertSpecialTimestamp(s.trim, zoneId) + } else None + specialTimestamp.getOrElse { val parsed = formatter.parse(s) val parsedZoneId = parsed.query(TemporalQueries.zone()) val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala index 291d40a9e84d..44adc59087cc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala @@ -103,17 +103,25 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper { } test("special date values") { - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => - withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { - val zoneId = getZoneId(timeZone) - val formatter = DateFormatter(zoneId) + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.POSTGRESQL.toString) { + DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) { + val zoneId = getZoneId(timeZone) + val formatter = DateFormatter(zoneId) + + assert(formatter.parse("EPOCH") === 0) + val today = localDateToDays(LocalDate.now(zoneId)) + assert(formatter.parse("Yesterday") === today - 1) + assert(formatter.parse("now") === today) + assert(formatter.parse("today ") === today) + assert(formatter.parse("tomorrow UTC") === today + 1) + } + } + } - assert(formatter.parse("EPOCH") === 0) - val today = localDateToDays(LocalDate.now(zoneId)) - assert(formatter.parse("Yesterday") === today - 1) - assert(formatter.parse("now") === today) - assert(formatter.parse("today ") === today) - assert(formatter.parse("tomorrow UTC") === today + 1) + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.SPARK.toString) { + intercept[java.time.format.DateTimeParseException] { + DateFormatter(ZoneOffset.UTC).parse("EPOCH") } } } From f90df73b4daaf8832438ebd10e55f15419b43fd3 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 22 Sep 2019 21:27:17 +0500 Subject: [PATCH 08/16] Support the flag in stringToDate --- .../apache/spark/sql/catalyst/expressions/Cast.scala | 7 ++++--- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 8 ++++---- .../spark/sql/catalyst/util/DateTimeUtils.scala | 11 ++++++++--- .../catalyst/expressions/HashExpressionsSuite.scala | 2 +- .../spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 10 ++++++++-- .../spark/sql/catalyst/util/UnsafeArraySuite.scala | 4 ++-- .../sql/execution/datasources/jdbc/JDBCRelation.scala | 5 +++-- 7 files changed, 30 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 08cf58970194..c95621174498 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -469,8 +469,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String // DateConverter private[this] def castToDate(from: DataType): Any => Any = from match { - case StringType => - buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull) + case StringType => buildCast[UTF8String](_, s => + DateTimeUtils.stringToDate(s, zoneId, supportSpecialValues).orNull) case TimestampType => // throw valid precision more than seconds, according to Hive. // Timestamp.nanos is in 0 to 999,999,999, no more than a second. @@ -1068,10 +1068,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case StringType => val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) val zid = getZoneId() + val sv = ctx.addReferenceObj("supportSpecialValues", supportSpecialValues) (c, evPrim, evNull) => code""" scala.Option $intOpt = - org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid); + org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid, $sv); if ($intOpt.isDefined()) { $evPrim = ((Integer) $intOpt.get()).intValue(); } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 3719cb0bffb1..e488d2a54b74 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1732,14 +1732,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging throw new ParseException(s"Cannot parse the $valueType value: $value", ctx) } } + def supportSpecialValues() = SQLConf.get.isPostgreSqlDialect + def zoneId() = getZoneId(SQLConf.get.sessionLocalTimeZone) try { valueType match { case "DATE" => - toLiteral(stringToDate(_, getZoneId(SQLConf.get.sessionLocalTimeZone)), DateType) + toLiteral(stringToDate(_, zoneId(), supportSpecialValues()), DateType) case "TIMESTAMP" => - val zoneId = getZoneId(SQLConf.get.sessionLocalTimeZone) - val supportSpecialValues = SQLConf.get.isPostgreSqlDialect - toLiteral(stringToTimestamp(_, zoneId, supportSpecialValues), TimestampType) + toLiteral(stringToTimestamp(_, zoneId(), supportSpecialValues()), TimestampType) case "INTERVAL" => Literal(CalendarInterval.fromString(value), CalendarIntervalType) case "X" => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 602be33256fe..587a00b68b26 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -383,7 +383,10 @@ object DateTimeUtils { * `yyyy-[m]m-[d]d *` * `yyyy-[m]m-[d]dT*` */ - def stringToDate(s: UTF8String, zoneId: ZoneId): Option[SQLDate] = { + def stringToDate( + s: UTF8String, + zoneId: ZoneId, + supportSpecialValues: Boolean): Option[SQLDate] = { if (s == null) { return None } @@ -391,8 +394,10 @@ object DateTimeUtils { var i = 0 var currentSegmentValue = 0 val bytes = s.trim.getBytes - val specialDate = convertSpecialDate(bytes, zoneId) - if (specialDate.isDefined) return specialDate + if (supportSpecialValues) { + val specialDate = convertSpecialDate(bytes, zoneId) + if (specialDate.isDefined) return specialDate + } var j = 0 while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) { val b = bytes(j) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala index 7c89c7e460da..6f2008f4d1b6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala @@ -174,7 +174,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("hive-hash for date type") { def checkHiveHashForDateType(dateString: String, expected: Long): Unit = { checkHiveHash( - DateTimeUtils.stringToDate(UTF8String.fromString(dateString), ZoneOffset.UTC).get, + DateTimeUtils.stringToDate(UTF8String.fromString(dateString), ZoneOffset.UTC, false).get, DateType, expected) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index fa7de912d22f..79064b4392f0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -120,8 +120,11 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { checkFromToJavaDate(new Date(df2.parse("1776-07-04 18:30:00 UTC").getTime)) } - private def toDate(s: String, zoneId: ZoneId = ZoneOffset.UTC): Option[SQLDate] = { - stringToDate(UTF8String.fromString(s), zoneId) + private def toDate( + s: String, + zoneId: ZoneId = ZoneOffset.UTC, + supportSpecialValues: Boolean = true): Option[SQLDate] = { + stringToDate(UTF8String.fromString(s), zoneId, supportSpecialValues) } test("string to date") { @@ -609,6 +612,9 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { assert(toDate("now UTC", zoneId) === None) // "now" does not accept time zones assert(toDate("today", zoneId).get === today) assert(toDate("tomorrow CET ", zoneId).get === today + 1) + + // It must return None when support of special values is disabled + assert(toDate("Epoch", zoneId, false) === None) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala index c685c148fa3b..17d77e7c1536 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala @@ -38,8 +38,8 @@ class UnsafeArraySuite extends SparkFunSuite { val doubleArray = Array(1.1, 2.2, 3.3) val stringArray = Array("1", "10", "100") val dateArray = Array( - DateTimeUtils.stringToDate(UTF8String.fromString("1970-1-1"), ZoneOffset.UTC).get, - DateTimeUtils.stringToDate(UTF8String.fromString("2016-7-26"), ZoneOffset.UTC).get) + DateTimeUtils.stringToDate(UTF8String.fromString("1970-1-1"), ZoneOffset.UTC, true).get, + DateTimeUtils.stringToDate(UTF8String.fromString("2016-7-26"), ZoneOffset.UTC, true).get) private def defaultZoneId = ZoneId.systemDefault() val timestampArray = Array( DateTimeUtils.stringToTimestamp( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala index 2ccf991e6e56..92c2f665e6f3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala @@ -184,12 +184,13 @@ private[sql] object JDBCRelation extends Logging { s"Cannot parse the bound value $value as ${columnType.catalogString}") } } + def supportSpecialValues(): Boolean = SQLConf.get.isPostgreSqlDialect columnType match { case _: NumericType => value.toLong - case DateType => parse(stringToDate(_, getZoneId(timeZoneId))).toLong + case DateType => parse(stringToDate(_, getZoneId(timeZoneId), supportSpecialValues())).toLong case TimestampType => parse(stringToTimestamp(_, getZoneId(timeZoneId), - supportSpecialValues = SQLConf.get.isPostgreSqlDialect)) } + supportSpecialValues())) } } private def toBoundValueInWhereClause( From 4f35fadbba690850b8638dfaf5892c9440128adc Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 22 Sep 2019 21:28:54 +0500 Subject: [PATCH 09/16] Enable supportSpecialValues by default in DateTimeUtilsSuite --- .../apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 79064b4392f0..6b419894f9e0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -154,7 +154,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { private def toTimestamp( str: String, zoneId: ZoneId, - supportSpecialValues: Boolean = false): Option[SQLTimestamp] = { + supportSpecialValues: Boolean = true): Option[SQLTimestamp] = { stringToTimestamp(UTF8String.fromString(str), zoneId, supportSpecialValues) } @@ -596,10 +596,6 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { // It must return None when support of special values is disabled assert(toTimestamp("Epoch", zoneId, false) === None) - - // Parsing of regular timestamps must not fail when support of special values is enabled - assert(toTimestamp("2019-09-18 22:26:30Z", ZoneOffset.UTC).get === - date(2019, 9, 18, 22, 26, 30)) } } From 2a86d9ca8913b057c31f8a80501a3f758159443f Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sun, 22 Sep 2019 22:03:03 +0500 Subject: [PATCH 10/16] Enable special timestamp values by default in DateTimeUtilsSuite and UnsafeArraySuite --- .../catalyst/util/DateTimeUtilsSuite.scala | 28 +++++++++---------- .../sql/catalyst/util/UnsafeArraySuite.scala | 4 +-- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 6b419894f9e0..6b870b59fe54 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -39,7 +39,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId) def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) { val parsedTimestampOp = DateTimeUtils.stringToTimestamp( - UTF8String.fromString(originalTime), defaultZoneId, false) + UTF8String.fromString(originalTime), defaultZoneId, true) assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly") assert(DateTimeUtils.timestampToString(tf, parsedTimestampOp.get) === expectedParsedTime) } @@ -282,9 +282,9 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { // Test stringToTimestamp assert(stringToTimestamp( - UTF8String.fromString("2015-02-29 00:00:00"), defaultZoneId, false).isEmpty) + UTF8String.fromString("2015-02-29 00:00:00"), defaultZoneId, true).isEmpty) assert(stringToTimestamp( - UTF8String.fromString("2015-04-31 00:00:00"), defaultZoneId, false).isEmpty) + UTF8String.fromString("2015-04-31 00:00:00"), defaultZoneId, true).isEmpty) assert(toTimestamp("2015-02-29", defaultZoneId).isEmpty) assert(toTimestamp("2015-04-31", defaultZoneId).isEmpty) } @@ -475,15 +475,15 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { } val defaultInputTS = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-05T09:32:05.359123"), defaultZoneId, false) + UTF8String.fromString("2015-03-05T09:32:05.359123"), defaultZoneId, true) val defaultInputTS1 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId, false) + UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId, true) val defaultInputTS2 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId, false) + UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId, true) val defaultInputTS3 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId, false) + UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId, true) val defaultInputTS4 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId, false) + UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId, true) testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", defaultInputTS.get) testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", defaultInputTS.get) @@ -508,17 +508,17 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers { for (tz <- ALL_TIMEZONES) { withDefaultTimeZone(tz) { val inputTS = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-05T09:32:05.359"), defaultZoneId, false) + UTF8String.fromString("2015-03-05T09:32:05.359"), defaultZoneId, true) val inputTS1 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId, false) + UTF8String.fromString("2015-03-31T20:32:05.359"), defaultZoneId, true) val inputTS2 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId, false) + UTF8String.fromString("2015-04-01T02:32:05.359"), defaultZoneId, true) val inputTS3 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId, false) + UTF8String.fromString("2015-03-30T02:32:05.359"), defaultZoneId, true) val inputTS4 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId, false) + UTF8String.fromString("2015-03-29T02:32:05.359"), defaultZoneId, true) val inputTS5 = DateTimeUtils.stringToTimestamp( - UTF8String.fromString("1999-03-29T01:02:03.456789"), defaultZoneId, false) + UTF8String.fromString("1999-03-29T01:02:03.456789"), defaultZoneId, true) testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz) testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala index 17d77e7c1536..8da5f996a8bc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala @@ -43,9 +43,9 @@ class UnsafeArraySuite extends SparkFunSuite { private def defaultZoneId = ZoneId.systemDefault() val timestampArray = Array( DateTimeUtils.stringToTimestamp( - UTF8String.fromString("1970-1-1 00:00:00"), defaultZoneId, false).get, + UTF8String.fromString("1970-1-1 00:00:00"), defaultZoneId, true).get, DateTimeUtils.stringToTimestamp( - UTF8String.fromString("2016-7-26 00:00:00"), defaultZoneId, false).get) + UTF8String.fromString("2016-7-26 00:00:00"), defaultZoneId, true).get) val decimalArray4_1 = Array( BigDecimal("123.4").setScale(1, BigDecimal.RoundingMode.FLOOR), BigDecimal("567.8").setScale(1, BigDecimal.RoundingMode.FLOOR)) From b340f5e38374aba102c0f50191e327bcd5fc9c12 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 23 Sep 2019 09:40:44 +0500 Subject: [PATCH 11/16] Fix CsvFunctionsSuite --- .../org/apache/spark/sql/CsvFunctionsSuite.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala index 0489f7a2ada8..1175ec376f02 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala @@ -195,11 +195,13 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession { } test("special date values") { - Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue => - val input = Seq(specialValue).toDS() - val readback = input.select(from_csv($"value", lit("d date"), - Map.empty[String, String].asJava)).collect() - assert(readback(0).getAs[Row](0).getAs[Date](0).getTime >= 0) + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.POSTGRESQL.toString) { + Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue => + val input = Seq(specialValue).toDS() + val readback = input.select(from_csv($"value", lit("d date"), + Map.empty[String, String].asJava)).collect() + assert(readback(0).getAs[Row](0).getAs[Date](0).getTime >= 0) + } } } } From fd34eb6b9f4feab61512ab073b94eceb54cb6ae7 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 23 Sep 2019 09:42:41 +0500 Subject: [PATCH 12/16] Fix JsonFunctionsSuite --- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index a01976aa98b9..2c36187b9745 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -622,11 +622,13 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { } test("special date values") { - Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue => - val input = Seq(s"""{"d": "$specialValue"}""").toDS() - val readback = input.select(from_json($"value", lit("d date"), - Map.empty[String, String].asJava)).collect() - assert(readback(0).getAs[Row](0).getAs[Date](0).getTime >= 0) + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.POSTGRESQL.toString) { + Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue => + val input = Seq(s"""{"d": "$specialValue"}""").toDS() + val readback = input.select(from_json($"value", lit("d date"), + Map.empty[String, String].asJava)).collect() + assert(readback(0).getAs[Row](0).getAs[Date](0).getTime >= 0) + } } } } From ba6fc7bd2a485abce3aa5180f385601bf23543fc Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 23 Sep 2019 10:07:08 +0500 Subject: [PATCH 13/16] Pass boolean literal directly in Cast --- .../apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- .../spark/sql/catalyst/expressions/CastSuite.scala | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index c95621174498..188833a3bcde 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1183,7 +1183,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String val zid = JavaCode.global( ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName), zoneIdClass) - val sv = ctx.addReferenceObj("supportSpecialValues", supportSpecialValues) + val sv = supportSpecialValues.toString val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => code""" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index ffb14e283868..a7e5b1572eb8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1200,4 +1200,14 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(Long.MinValue - 0.9D, LongType), Long.MinValue) } } + + test("cast special timestamp value") { + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.POSTGRESQL.toString) { + checkEvaluation(cast(Literal("epoch"), TimestampType, Option("UTC")), + new Timestamp(0)) + } + withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.SPARK.toString) { + checkEvaluation(cast(Literal("epoch"), TimestampType, Option("UTC")), null) + } + } } From ea420535ec2b1dd24e90309c2b6905f55a3ab90f Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 23 Sep 2019 10:14:47 +0500 Subject: [PATCH 14/16] Pass boolean literal for date directly in Cast --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 188833a3bcde..b6aabbc4b2bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1068,7 +1068,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String case StringType => val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) val zid = getZoneId() - val sv = ctx.addReferenceObj("supportSpecialValues", supportSpecialValues) + val sv = supportSpecialValues.toString (c, evPrim, evNull) => code""" scala.Option $intOpt = diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index a7e5b1572eb8..0210f8b0b5ef 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1201,13 +1201,16 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } } - test("cast special timestamp value") { + test("cast special timestamp and date value") { withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.POSTGRESQL.toString) { checkEvaluation(cast(Literal("epoch"), TimestampType, Option("UTC")), new Timestamp(0)) + checkEvaluation(cast(Literal("epoch"), DateType, Option("UTC")), + new Date(0)) } withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.SPARK.toString) { checkEvaluation(cast(Literal("epoch"), TimestampType, Option("UTC")), null) + checkEvaluation(cast(Literal("epoch"), DateType, Option("UTC")), null) } } } From 53afcdbe02b5e0de9c4ab540c006990824b621dd Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 23 Sep 2019 10:23:23 +0500 Subject: [PATCH 15/16] Enable spacial values in HashExpressionsSuite by default --- .../spark/sql/catalyst/expressions/HashExpressionsSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala index 6f2008f4d1b6..4876e1dccbb4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala @@ -174,7 +174,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("hive-hash for date type") { def checkHiveHashForDateType(dateString: String, expected: Long): Unit = { checkHiveHash( - DateTimeUtils.stringToDate(UTF8String.fromString(dateString), ZoneOffset.UTC, false).get, + DateTimeUtils.stringToDate(UTF8String.fromString(dateString), ZoneOffset.UTC, true).get, DateType, expected) } @@ -210,7 +210,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { expected: Long, zoneId: ZoneId = ZoneOffset.UTC): Unit = { checkHiveHash( - DateTimeUtils.stringToTimestamp(UTF8String.fromString(timestamp), zoneId, false).get, + DateTimeUtils.stringToTimestamp(UTF8String.fromString(timestamp), zoneId, true).get, TimestampType, expected) } From 94abaeaed5e7140a28e3784f5a7e5c89022f0eda Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 23 Sep 2019 17:07:58 +0500 Subject: [PATCH 16/16] Fix CastSuite for the date case --- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 0210f8b0b5ef..470823f3f17e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1206,7 +1206,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(Literal("epoch"), TimestampType, Option("UTC")), new Timestamp(0)) checkEvaluation(cast(Literal("epoch"), DateType, Option("UTC")), - new Date(0)) + Date.valueOf("1970-01-01")) } withSQLConf(SQLConf.DIALECT.key -> SQLConf.Dialect.SPARK.toString) { checkEvaluation(cast(Literal("epoch"), TimestampType, Option("UTC")), null)