@@ -1132,70 +1132,73 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
11321132 assertEquals(numRecords - numRecordsToDelete, snapshotDF2.count())
11331133 }
11341134
1135+ @ ParameterizedTest
11351136 @ CsvSource (Array (" avro, 6" , " parquet, 6" ))
11361137 def testLogicalTypesReadRepair (logBlockFormat : String , tableVersion : Int ): Unit = {
1137- val logBlockString = if (logBlockFormat == " avro" ) {
1138- " "
1139- } else {
1140- " _parquet_log"
1141- }
1142- val prevTimezone = spark.conf.get(" spark.sql.session.timeZone" )
1143- val propertyValue : String = System .getProperty(" spark.testing" )
1144- try {
1145- if (HoodieSparkUtils .isSpark3_3) {
1146- System .setProperty(" spark.testing" , " true" )
1138+ if (HoodieSparkUtils .gteqSpark3_4) {
1139+ val logBlockString = if (logBlockFormat == " avro" ) {
1140+ " "
1141+ } else {
1142+ " _parquet_log"
11471143 }
1148- spark.conf.set(" spark.sql.session.timeZone" , " UTC" )
1149- val tableName = " trips_logical_types_json_mor_read_v" + tableVersion + logBlockString
1150- val dataPath = " file://" + basePath + " /" + tableName
1151- val zipOutput = Paths .get(new URI (dataPath))
1152- HoodieTestUtils .extractZipToDirectory(" /" + tableName + " .zip" , zipOutput, getClass)
1153- val tableBasePath = zipOutput.toString
1154-
1155- val df = spark.read.format(" hudi" ).load(tableBasePath)
1156-
1157- val rows = df.collect()
1158- assertEquals(20 , rows.length)
1159- for (row <- rows) {
1160- val hash = row.get(6 ).asInstanceOf [String ].hashCode()
1161- if ((hash & 1 )== 0 ) {
1162- assertEquals(" 2020-01-01T00:00:00.001Z" , row.get(15 ).asInstanceOf [Timestamp ].toInstant.toString)
1163- assertEquals(" 2020-06-01T12:00:00.000001Z" , row.get(16 ).asInstanceOf [Timestamp ].toInstant.toString)
1164- assertEquals(" 2015-05-20T12:34:56.001" , row.get(17 ).toString)
1165- assertEquals(" 2017-07-07T07:07:07.000001" , row.get(18 ).toString)
1166- } else {
1167- assertEquals(" 2019-12-31T23:59:59.999Z" , row.get(15 ).asInstanceOf [Timestamp ].toInstant.toString)
1168- assertEquals(" 2020-06-01T11:59:59.999999Z" , row.get(16 ).asInstanceOf [Timestamp ].toInstant.toString)
1169- assertEquals(" 2015-05-20T12:34:55.999" , row.get(17 ).toString)
1170- assertEquals(" 2017-07-07T07:07:06.999999" , row.get(18 ).toString)
1144+ val prevTimezone = spark.conf.get(" spark.sql.session.timeZone" )
1145+ val propertyValue : String = System .getProperty(" spark.testing" )
1146+ try {
1147+ if (HoodieSparkUtils .isSpark3_3) {
1148+ System .setProperty(" spark.testing" , " true" )
11711149 }
1172- }
1173- assertEquals(10 , df.filter(" ts_millis > timestamp('2020-01-01 00:00:00Z')" ).count())
1174- assertEquals(10 , df.filter(" ts_millis < timestamp('2020-01-01 00:00:00Z')" ).count())
1175- assertEquals(0 , df.filter(" ts_millis > timestamp('2020-01-01 00:00:00.001Z')" ).count())
1176- assertEquals(0 , df.filter(" ts_millis < timestamp('2019-12-31 23:59:59.999Z')" ).count())
1177-
1178- assertEquals(10 , df.filter(" ts_micros > timestamp('2020-06-01 12:00:00Z')" ).count())
1179- assertEquals(10 , df.filter(" ts_micros < timestamp('2020-06-01 12:00:00Z')" ).count())
1180- assertEquals(0 , df.filter(" ts_micros > timestamp('2020-06-01 12:00:00.000001Z')" ).count())
1181- assertEquals(0 , df.filter(" ts_micros < timestamp('2020-06-01 11:59:59.999999Z')" ).count())
1182-
1183- assertEquals(10 , df.filter(" local_ts_millis > CAST('2015-05-20 12:34:56' AS TIMESTAMP_NTZ)" ).count())
1184- assertEquals(10 , df.filter(" local_ts_millis < CAST('2015-05-20 12:34:56' AS TIMESTAMP_NTZ)" ).count())
1185- assertEquals(0 , df.filter(" local_ts_millis > CAST('2015-05-20 12:34:56.001' AS TIMESTAMP_NTZ)" ).count())
1186- assertEquals(0 , df.filter(" local_ts_millis < CAST('2015-05-20 12:34:55.999' AS TIMESTAMP_NTZ)" ).count())
1187-
1188- assertEquals(10 , df.filter(" local_ts_micros > CAST('2017-07-07 07:07:07' AS TIMESTAMP_NTZ)" ).count())
1189- assertEquals(10 , df.filter(" local_ts_micros < CAST('2017-07-07 07:07:07' AS TIMESTAMP_NTZ)" ).count())
1190- assertEquals(0 , df.filter(" local_ts_micros > CAST('2017-07-07 07:07:07.000001' AS TIMESTAMP_NTZ)" ).count())
1191- assertEquals(0 , df.filter(" local_ts_micros < CAST('2017-07-07 07:07:06.999999' AS TIMESTAMP_NTZ)" ).count())
1192- } finally {
1193- spark.conf.set(" spark.sql.session.timeZone" , prevTimezone)
1194- if (HoodieSparkUtils .isSpark3_3) {
1195- if (propertyValue == null ) {
1196- System .clearProperty(" spark.testing" )
1197- } else {
1198- System .setProperty(" spark.testing" , propertyValue)
1150+ spark.conf.set(" spark.sql.session.timeZone" , " UTC" )
1151+ val tableName = " trips_logical_types_json_mor_read_v" + tableVersion + logBlockString
1152+ val dataPath = " file://" + basePath + " /" + tableName
1153+ val zipOutput = Paths .get(new URI (dataPath))
1154+ HoodieTestUtils .extractZipToDirectory(" /" + tableName + " .zip" , zipOutput, getClass)
1155+ val tableBasePath = zipOutput.toString
1156+
1157+ val df = spark.read.format(" hudi" ).load(tableBasePath)
1158+
1159+ val rows = df.collect()
1160+ assertEquals(20 , rows.length)
1161+ for (row <- rows) {
1162+ val hash = row.get(6 ).asInstanceOf [String ].hashCode()
1163+ if ((hash & 1 ) == 0 ) {
1164+ assertEquals(" 2020-01-01T00:00:00.001Z" , row.get(15 ).asInstanceOf [Timestamp ].toInstant.toString)
1165+ assertEquals(" 2020-06-01T12:00:00.000001Z" , row.get(16 ).asInstanceOf [Timestamp ].toInstant.toString)
1166+ assertEquals(" 2015-05-20T12:34:56.001" , row.get(17 ).toString)
1167+ assertEquals(" 2017-07-07T07:07:07.000001" , row.get(18 ).toString)
1168+ } else {
1169+ assertEquals(" 2019-12-31T23:59:59.999Z" , row.get(15 ).asInstanceOf [Timestamp ].toInstant.toString)
1170+ assertEquals(" 2020-06-01T11:59:59.999999Z" , row.get(16 ).asInstanceOf [Timestamp ].toInstant.toString)
1171+ assertEquals(" 2015-05-20T12:34:55.999" , row.get(17 ).toString)
1172+ assertEquals(" 2017-07-07T07:07:06.999999" , row.get(18 ).toString)
1173+ }
1174+ }
1175+ assertEquals(10 , df.filter(" ts_millis > timestamp('2020-01-01 00:00:00Z')" ).count())
1176+ assertEquals(10 , df.filter(" ts_millis < timestamp('2020-01-01 00:00:00Z')" ).count())
1177+ assertEquals(0 , df.filter(" ts_millis > timestamp('2020-01-01 00:00:00.001Z')" ).count())
1178+ assertEquals(0 , df.filter(" ts_millis < timestamp('2019-12-31 23:59:59.999Z')" ).count())
1179+
1180+ assertEquals(10 , df.filter(" ts_micros > timestamp('2020-06-01 12:00:00Z')" ).count())
1181+ assertEquals(10 , df.filter(" ts_micros < timestamp('2020-06-01 12:00:00Z')" ).count())
1182+ assertEquals(0 , df.filter(" ts_micros > timestamp('2020-06-01 12:00:00.000001Z')" ).count())
1183+ assertEquals(0 , df.filter(" ts_micros < timestamp('2020-06-01 11:59:59.999999Z')" ).count())
1184+
1185+ assertEquals(10 , df.filter(" local_ts_millis > CAST('2015-05-20 12:34:56' AS TIMESTAMP_NTZ)" ).count())
1186+ assertEquals(10 , df.filter(" local_ts_millis < CAST('2015-05-20 12:34:56' AS TIMESTAMP_NTZ)" ).count())
1187+ assertEquals(0 , df.filter(" local_ts_millis > CAST('2015-05-20 12:34:56.001' AS TIMESTAMP_NTZ)" ).count())
1188+ assertEquals(0 , df.filter(" local_ts_millis < CAST('2015-05-20 12:34:55.999' AS TIMESTAMP_NTZ)" ).count())
1189+
1190+ assertEquals(10 , df.filter(" local_ts_micros > CAST('2017-07-07 07:07:07' AS TIMESTAMP_NTZ)" ).count())
1191+ assertEquals(10 , df.filter(" local_ts_micros < CAST('2017-07-07 07:07:07' AS TIMESTAMP_NTZ)" ).count())
1192+ assertEquals(0 , df.filter(" local_ts_micros > CAST('2017-07-07 07:07:07.000001' AS TIMESTAMP_NTZ)" ).count())
1193+ assertEquals(0 , df.filter(" local_ts_micros < CAST('2017-07-07 07:07:06.999999' AS TIMESTAMP_NTZ)" ).count())
1194+ } finally {
1195+ spark.conf.set(" spark.sql.session.timeZone" , prevTimezone)
1196+ if (HoodieSparkUtils .isSpark3_3) {
1197+ if (propertyValue == null ) {
1198+ System .clearProperty(" spark.testing" )
1199+ } else {
1200+ System .setProperty(" spark.testing" , propertyValue)
1201+ }
11991202 }
12001203 }
12011204 }
0 commit comments