snowflakedb · arthurli1126 · Mar 8, 2023 · Mar 8, 2023 · Mar 13, 2023 · Mar 20, 2023
@@ -710,111 +710,102 @@ class SnowflakeResultSetRDDSuite extends IntegrationSuiteBase {
 
  test("testTimestamp") {
  setupTimestampTable
- // COPY UNLOAD can't be run because it only supports millisecond(0.001s).
- if (!params.useCopyUnload) {
- val result = sparkSession.sql("select * from test_table_timestamp")
+ val result = sparkSession.sql("select * from test_table_timestamp")
 
- testPushdown(
- s""" SELECT * FROM ( $test_table_timestamp ) AS "SF_CONNECTOR_QUERY_ALIAS" """.stripMargin,
- result,
- test_table_timestamp_rows
- )
- }
+ testPushdown(
+ s""" SELECT * FROM ( $test_table_timestamp ) AS "SF_CONNECTOR_QUERY_ALIAS" """.stripMargin,
+ result,
+ test_table_timestamp_rows
+ )
  }
 
  // Most simple case for timestamp write
  test("testTimestamp write") {
  setupTimestampTable
- // COPY UNLOAD can't be run because it only supports millisecond(0.001s).
- if (!params.useCopyUnload) {
- val createTableSql =
- s"""create or replace table $test_table_write (
- | int_c int,
- | ts_ltz_c timestamp_ltz(9), ts_ltz_c0 timestamp_ltz(0),
- | ts_ltz_c3 timestamp_ltz(3), ts_ltz_c6 timestamp_ltz(6),
- |
- | ts_ntz_c timestamp_ntz(9), ts_ntz_c0 timestamp_ntz(0),
- | ts_ntz_c3 timestamp_ntz(3), ts_ntz_c6 timestamp_ntz(6),
- |
- | ts_tz_c timestamp_tz(9), ts_tz_c0 timestamp_tz(0),
- | ts_tz_c3 timestamp_tz(3), ts_tz_c6 timestamp_tz(6)
- | )""".stripMargin
- writeAndCheckForOneTable(sparkSession, thisConnectorOptionsNoTable,
- test_table_timestamp, "", test_table_write, Some(createTableSql), true)
- }
+ val createTableSql =
+ s"""create or replace table $test_table_write (
+ | int_c int,
+ | ts_ltz_c timestamp_ltz(9), ts_ltz_c0 timestamp_ltz(0),
+ | ts_ltz_c3 timestamp_ltz(3), ts_ltz_c6 timestamp_ltz(6),
+ |
+ | ts_ntz_c timestamp_ntz(9), ts_ntz_c0 timestamp_ntz(0),
+ | ts_ntz_c3 timestamp_ntz(3), ts_ntz_c6 timestamp_ntz(6),
+ |
+ | ts_tz_c timestamp_tz(9), ts_tz_c0 timestamp_tz(0),
+ | ts_tz_c3 timestamp_tz(3), ts_tz_c6 timestamp_tz(6)
+ | )""".stripMargin
+ writeAndCheckForOneTable(sparkSession, thisConnectorOptionsNoTable,
+ test_table_timestamp, "", test_table_write, Some(createTableSql), true)
  }
 
  // test timestamp write with timezone
  test("testTimestamp write with timezone") {
  setupTimestampTable
- // COPY UNLOAD can't be run because it only supports millisecond(0.001s).
- if (!params.useCopyUnload) {
- var oldValue: Option[String] = None
- if (thisConnectorOptionsNoTable.contains("sftimezone")) {
- oldValue = Some(thisConnectorOptionsNoTable("sftimezone"))
- thisConnectorOptionsNoTable -= "sftimezone"
+ var oldValue: Option[String] = None
+ if (thisConnectorOptionsNoTable.contains("sftimezone")) {
+ oldValue = Some(thisConnectorOptionsNoTable("sftimezone"))
+ thisConnectorOptionsNoTable -= "sftimezone"
+ }
+ val oldTimezone = TimeZone.getDefault
+
+ val createTableSql =
+ s"""create or replace table $test_table_write (
+ | int_c int,
+ | ts_ltz_c timestamp_ltz(9), ts_ltz_c0 timestamp_ltz(0),
+ | ts_ltz_c3 timestamp_ltz(3), ts_ltz_c6 timestamp_ltz(6),
+ |
+ | ts_ntz_c timestamp_ntz(9), ts_ntz_c0 timestamp_ntz(0),
+ | ts_ntz_c3 timestamp_ntz(3), ts_ntz_c6 timestamp_ntz(6),
+ |
+ | ts_tz_c timestamp_tz(9), ts_tz_c0 timestamp_tz(0),
+ | ts_tz_c3 timestamp_tz(3), ts_tz_c6 timestamp_tz(6)
+ | )""".stripMargin
+
+ // Test conditions with (sfTimezone, sparkTimezone)
+ val testConditions: List[(String, String)] = List(
+ (null, "GMT")
+ , (null, "America/Los_Angeles")
+ , ("America/New_York", "America/Los_Angeles")
+ )
+
+ for ((sfTimezone, sparkTimezone) <- testConditions) {
+ // set spark timezone
+ val thisSparkSession = if (sparkTimezone != null) {
+ TimeZone.setDefault(TimeZone.getTimeZone(sparkTimezone))
+ SparkSession.builder
+ .master("local")
+ .appName("SnowflakeSourceSuite")
+ .config("spark.sql.shuffle.partitions", "6")
+ .config("spark.driver.extraJavaOptions", s"-Duser.timezone=$sparkTimezone")
+ .config("spark.executor.extraJavaOptions", s"-Duser.timezone=$sparkTimezone")
+ .config("spark.sql.session.timeZone", sparkTimezone)
+ .getOrCreate()
+ } else {
+ sparkSession
  }
- val oldTimezone = TimeZone.getDefault
-
- val createTableSql =
- s"""create or replace table $test_table_write (
- | int_c int,
- | ts_ltz_c timestamp_ltz(9), ts_ltz_c0 timestamp_ltz(0),
- | ts_ltz_c3 timestamp_ltz(3), ts_ltz_c6 timestamp_ltz(6),
- |
- | ts_ntz_c timestamp_ntz(9), ts_ntz_c0 timestamp_ntz(0),
- | ts_ntz_c3 timestamp_ntz(3), ts_ntz_c6 timestamp_ntz(6),
- |
- | ts_tz_c timestamp_tz(9), ts_tz_c0 timestamp_tz(0),
- | ts_tz_c3 timestamp_tz(3), ts_tz_c6 timestamp_tz(6)
- | )""".stripMargin
-
- // Test conditions with (sfTimezone, sparkTimezone)
- val testConditions: List[(String, String)] = List(
- (null, "GMT")
- , (null, "America/Los_Angeles")
- , ("America/New_York", "America/Los_Angeles")
- )
 
- for ((sfTimezone, sparkTimezone) <- testConditions) {
- // set spark timezone
- val thisSparkSession = if (sparkTimezone != null) {
- TimeZone.setDefault(TimeZone.getTimeZone(sparkTimezone))
- SparkSession.builder
- .master("local")
- .appName("SnowflakeSourceSuite")
- .config("spark.sql.shuffle.partitions", "6")
- .config("spark.driver.extraJavaOptions", s"-Duser.timezone=$sparkTimezone")
- .config("spark.executor.extraJavaOptions", s"-Duser.timezone=$sparkTimezone")
- .config("spark.sql.session.timeZone", sparkTimezone)
- .getOrCreate()
- } else {
- sparkSession
+ // Set timezone option
+ if (sfTimezone != null) {
+ if (thisConnectorOptionsNoTable.contains("sftimezone")) {
+ thisConnectorOptionsNoTable -= "sftimezone"
  }
-
- // Set timezone option
- if (sfTimezone != null) {
- if (thisConnectorOptionsNoTable.contains("sftimezone")) {
- thisConnectorOptionsNoTable -= "sftimezone"
- }
- thisConnectorOptionsNoTable += ("sftimezone" -> sfTimezone)
- } else {
- if (thisConnectorOptionsNoTable.contains("sftimezone")) {
- thisConnectorOptionsNoTable -= "sftimezone"
- }
+ thisConnectorOptionsNoTable += ("sftimezone" -> sfTimezone)
+ } else {
+ if (thisConnectorOptionsNoTable.contains("sftimezone")) {
+ thisConnectorOptionsNoTable -= "sftimezone"
  }
-
- writeAndCheckForOneTable(thisSparkSession, thisConnectorOptionsNoTable,
- test_table_timestamp, "", test_table_write, Some(createTableSql), true)
  }
 
- // restore options for further test
- thisConnectorOptionsNoTable -= "sftimezone"
- if (oldValue.isDefined) {
- thisConnectorOptionsNoTable += ("sftimezone" -> oldValue.get)
- }
- TimeZone.setDefault(oldTimezone)
+ writeAndCheckForOneTable(thisSparkSession, thisConnectorOptionsNoTable,
+ test_table_timestamp, "", test_table_write, Some(createTableSql), true)
+ }
+
+ // restore options for further test
+ thisConnectorOptionsNoTable -= "sftimezone"
+ if (oldValue.isDefined) {
+ thisConnectorOptionsNoTable += ("sftimezone" -> oldValue.get)
  }
+ TimeZone.setDefault(oldTimezone)
  }
 
  test("testLargeResult") {

@@ -41,15 +41,18 @@ private[snowflake] object Conversions {
  // Note - we use a pattern with timezone in the beginning, to make sure
  // parsing with PATTERN_NTZ fails for PATTERN_TZLTZ strings.
  // Note - for JDK 1.6, we use Z ipo XX for SimpleDateFormat
+ // Because simpleDateFormat only support milliseconds,
+ // we need to refactor this and handle nano seconds field separately
  private val PATTERN_TZLTZ =
  if (System.getProperty("java.version").startsWith("1.6.")) {
- "Z yyyy-MM-dd HH:mm:ss.SSS"
+ "Z yyyy-MM-dd HH:mm:ss."
  } else {
- "XX yyyy-MM-dd HH:mm:ss.SSS"
+ "XX yyyy-MM-dd HH:mm:ss."
  }
 
  // For NTZ, Snowflake serializes w/o timezone
- private val PATTERN_NTZ = "yyyy-MM-dd HH:mm:ss.SSS"
+ // and handle nano seconds field separately during parsing
+ private val PATTERN_NTZ = "yyyy-MM-dd HH:mm:ss."
 
  // For DATE, simple ISO format
  private val PATTERN_DATE = "yyyy-MM-dd"
@@ -193,8 +196,25 @@ private[snowflake] object Conversions {
  * Parse a string exported from a Snowflake TIMESTAMP column
  */
  private def parseTimestamp(s: String, isInternalRow: Boolean): Any = {
+ // Need to handle the nano seconds filed separately
+ // valueOf only works with yyyy-[m]m-[d]d hh:mm:ss[.f...]
+ // so we need to do a little parsing
+ val timestampRegex = """\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3,9}""".r
+
+ val parsedTS = timestampRegex.findFirstMatchIn(s) match {
+ case Some(ts) => ts.toString()
+ case None => throw new IllegalArgumentException(s"Malformed timestamp $s")
+ }
+
+ val ts = java.sql.Timestamp.valueOf(parsedTS)
+ val nanoFraction = ts.getNanos
+
  val res = new Timestamp(snowflakeTimestampFormat.parse(s).getTime)
- if (isInternalRow) DateTimeUtils.fromJavaTimestamp(res)
+
+ res.setNanos(nanoFraction)
+ // Since fromJavaTimestamp and spark only support microsecond
+ // level precision so have to divide the nano field by 1000
+ if (isInternalRow) (DateTimeUtils.fromJavaTimestamp(res) + nanoFraction/1000)
  else res
  }
 

@@ -193,4 +193,88 @@ class ConversionsSuite extends FunSuite {
 
  assert(expect == result.toString())
  }
+
+ test("Data with micro-seconds and nano-seconds precision should be correctly converted"){
+ val convertRow = Conversions.createRowConverter[Row](TestUtils.testSchema)
+ val doubleMin = Double.MinValue.toString
+ val longMax = Long.MaxValue.toString
+ // scalastyle:off
+ val unicodeString = "Unicode是樂趣"
+ // scalastyle:on
+
+ val timestampString = "2014-03-01 00:00:01.123456"
+
+ val expectedTimestampMicro: Timestamp = java.sql.Timestamp.valueOf(timestampString)
+
+ val dateString = "2015-07-01"
+ val expectedDate = TestUtils.toMillis(2015, 6, 1, 0, 0, 0)
+
+
+
+ val timestampString2 = "2014-03-01 00:00:01.123456789"
+
+ val expectedTimestampMicro2: Timestamp = java.sql.Timestamp.valueOf(timestampString2)
+
+ val dateString2 = "2015-07-01"
+ val expectedDate2 = TestUtils.toMillis(2015, 6, 1, 0, 0, 0)
+
+ val convertedRow = convertRow(
+ Array(
+ "1",
+ dateString,
+ "123.45",
+ doubleMin,
+ "1.0",
+ "42",
+ longMax,
+ "23",
+ unicodeString,
+ timestampString
+ )
+ )
+
+ val expectedRow = Row(
+ 1.asInstanceOf[Byte],
+ new Date(expectedDate),
+ new java.math.BigDecimal("123.45"),
+ Double.MinValue,
+ 1.0f,
+ 42,
+ Long.MaxValue,
+ 23.toShort,
+ unicodeString,
+ expectedTimestampMicro
+ )
+
+ val convertedRow2 = convertRow(
+ Array(
+ "1",
+ dateString2,
+ "123.45",
+ doubleMin,
+ "1.0",
+ "42",
+ longMax,
+ "23",
+ unicodeString,
+ timestampString2
+ )
+ )
+
+ val expectedRow2 = Row(
+ 1.asInstanceOf[Byte],
+ new Date(expectedDate2),
+ new java.math.BigDecimal("123.45"),
+ Double.MinValue,
+ 1.0f,
+ 42,
+ Long.MaxValue,
+ 23.toShort,
+ unicodeString,
+ expectedTimestampMicro2
+ )
+
+ assert(convertedRow == expectedRow)
+ assert(convertedRow2 == expectedRow2)
+ }
 }