diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md index 98d31a59ac7a..42b117bea464 100644 --- a/docs/sql-data-sources-csv.md +++ b/docs/sql-data-sources-csv.md @@ -110,8 +110,8 @@ Data source options of CSV can be set via: prefersDate - false - During schema inference (inferSchema), attempts to infer string columns that contain dates or timestamps as Date if the values satisfy the dateFormat option and failed to be parsed by the respective formatter. With a user-provided schema, attempts to parse timestamp columns as dates using dateFormat if they fail to conform to timestampFormat, in this case the parsed values will be cast to timestamp type afterwards. + true + During schema inference (inferSchema), attempts to infer string columns that contain dates as Date if the values satisfy the dateFormat option or default date format. For columns that contain a mixture of dates and timestamps, try inferring them as TimestampType if timestamp format not specified, otherwise infer them as StringType. read diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala index 53d748989204..bdfa4ac3f0f8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala @@ -59,6 +59,13 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { ExprUtils.getDecimalParser(options.locale) } + // Date formats that could be parsed in DefaultTimestampFormatter + // Reference: DateTimeUtils.parseTimestampString + // Used to determine inferring a column with mixture of dates and timestamps as TimestampType or + // StringType when no timestamp format is specified (the lenient timestamp formatter will be used) + private val LENIENT_TS_FORMATTER_SUPPORTED_DATE_FORMATS = Set( + "yyyy-MM-dd", "yyyy-M-d", "yyyy-M-dd", "yyyy-MM-d", "yyyy-MM", "yyyy-M", "yyyy") + /** * Similar to the JSON schema inference * 1. Infer type of each row @@ -123,10 +130,8 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { case LongType => tryParseLong(field) case _: DecimalType => tryParseDecimal(field) case DoubleType => tryParseDouble(field) - case DateType => tryParseDateTime(field) - case TimestampNTZType if options.prefersDate => tryParseDateTime(field) + case DateType => tryParseDate(field) case TimestampNTZType => tryParseTimestampNTZ(field) - case TimestampType if options.prefersDate => tryParseDateTime(field) case TimestampType => tryParseTimestamp(field) case BooleanType => tryParseBoolean(field) case StringType => StringType @@ -179,13 +184,13 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field)) { DoubleType } else if (options.prefersDate) { - tryParseDateTime(field) + tryParseDate(field) } else { tryParseTimestampNTZ(field) } } - private def tryParseDateTime(field: String): DataType = { + private def tryParseDate(field: String): DataType = { if ((allCatch opt dateFormatter.parse(field)).isDefined) { DateType } else { @@ -233,7 +238,40 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { * is compatible with both input data types. */ private def compatibleType(t1: DataType, t2: DataType): Option[DataType] = { - TypeCoercion.findTightestCommonType(t1, t2).orElse(findCompatibleTypeForCSV(t1, t2)) + (t1, t2) match { + case (DateType, TimestampType) | (DateType, TimestampNTZType) | + (TimestampNTZType, DateType) | (TimestampType, DateType) => + // For a column containing a mixture of dates and timestamps, infer it as timestamp type + // if its dates can be inferred as timestamp type, otherwise infer it as StringType. + // This only happens when the timestamp pattern is not specified, as the default timestamp + // parser is very lenient and can parse date string as well. + val dateFormat = options.dateFormatInRead.getOrElse(DateFormatter.defaultPattern) + t1 match { + case DateType if canParseDateAsTimestamp(dateFormat, t2) => + Some(t2) + case TimestampType | TimestampNTZType if canParseDateAsTimestamp(dateFormat, t1) => + Some(t1) + case _ => Some(StringType) + } + case _ => TypeCoercion.findTightestCommonType(t1, t2).orElse(findCompatibleTypeForCSV(t1, t2)) + } + } + + /** + * Return true if strings of given date format can be parsed as timestamps + * 1. If user provides timestamp format, we will parse strings as timestamps using + * Iso8601TimestampFormatter (with strict timestamp parsing). Any date string can not be parsed + * as timestamp type in this case + * 2. Otherwise, we will use DefaultTimestampFormatter to parse strings as timestamps, which + * is more lenient and can parse strings of some date formats as timestamps. + */ + private def canParseDateAsTimestamp(dateFormat: String, tsType: DataType): Boolean = { + if ((tsType.isInstanceOf[TimestampType] && options.timestampFormatInRead.isEmpty) || + (tsType.isInstanceOf[TimestampNTZType] && options.timestampNTZFormatInRead.isEmpty)) { + LENIENT_TS_FORMATTER_SUPPORTED_DATE_FORMATS.contains(dateFormat) + } else { + false + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala index 1162c2882dd7..88396c65cc07 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala @@ -149,32 +149,30 @@ class CSVOptions( val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US) /** - * Infer columns with all valid date entries as date type (otherwise inferred as timestamp type) - * if schema inference is enabled. When being used with user-provided schema, tries to parse - * timestamp values as dates if the values do not conform to the timestamp formatter before - * falling back to the backward compatible parsing - the parsed values will be cast to timestamp - * afterwards. + * Infer columns with all valid date entries as date type (otherwise inferred as string or + * timestamp type) if schema inference is enabled. * - * Disabled by default for backwards compatibility and performance. + * Enabled by default. * * Not compatible with legacyTimeParserPolicy == LEGACY since legacy date parser will accept - * extra trailing characters. + * extra trailing characters. Thus, disabled when legacyTimeParserPolicy == LEGACY */ val prefersDate = { - val inferDateFlag = getBool("prefersDate") - if (inferDateFlag && SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) { - throw QueryExecutionErrors.inferDateWithLegacyTimeParserError() + if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) { + false + } else { + getBool("prefersDate", true) } - inferDateFlag } + val dateFormatOption: Option[String] = parameters.get("dateFormat") // Provide a default value for dateFormatInRead when prefersDate. This ensures that the // Iso8601DateFormatter (with strict date parsing) is used for date inference val dateFormatInRead: Option[String] = if (prefersDate) { - Option(parameters.getOrElse("dateFormat", DateFormatter.defaultPattern)) + Option(dateFormatOption.getOrElse(DateFormatter.defaultPattern)) } else { - parameters.get("dateFormat") + dateFormatOption } val dateFormatInWrite: String = parameters.getOrElse("dateFormat", DateFormatter.defaultPattern) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala index 160f6beb09b9..8464e394ab5a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala @@ -28,7 +28,6 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, OrderedFilters} import org.apache.spark.sql.catalyst.expressions.{Cast, EmptyRow, ExprUtils, GenericInternalRow, Literal} import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{daysToMicros, TimeZoneUTC} import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ import org.apache.spark.sql.errors.QueryExecutionErrors @@ -135,7 +134,7 @@ class UnivocityParser( .orElse(SQLConf.get.csvEnableDateTimeParsingFallback) .getOrElse { SQLConf.get.legacyTimeParserPolicy == SQLConf.LegacyBehaviorPolicy.LEGACY || - options.dateFormatInRead.isEmpty + options.dateFormatOption.isEmpty } // Retrieve the raw record string. @@ -238,29 +237,19 @@ class UnivocityParser( timestampFormatter.parse(datum) } catch { case NonFatal(e) => - // There may be date type entries in timestamp column due to schema inference - if (options.prefersDate) { - daysToMicros(dateFormatter.parse(datum), options.zoneId) - } else { - // If fails to parse, then tries the way used in 2.0 and 1.x for backwards - // compatibility if enabled. - if (!enableParsingFallbackForTimestampType) { - throw e - } - val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum)) - DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse(throw(e)) + // If fails to parse, then tries the way used in 2.0 and 1.x for backwards + // compatibility if enabled. + if (!enableParsingFallbackForTimestampType) { + throw e } + val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum)) + DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse(throw(e)) } } case _: TimestampNTZType => (d: String) => nullSafeDatum(d, name, nullable, options) { datum => - try { - timestampNTZFormatter.parseWithoutTimeZone(datum, false) - } catch { - case NonFatal(e) if options.prefersDate => - daysToMicros(dateFormatter.parse(datum), TimeZoneUTC.toZoneId) - } + timestampNTZFormatter.parseWithoutTimeZone(datum, false) } case _: StringType => (d: String) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala index 7066a5614ee9..8cae2400e0ce 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala @@ -97,8 +97,8 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper { } test("Type arrays are merged to highest common type") { - val options = new CSVOptions(Map.empty[String, String], false, "UTC") - val inferSchema = new CSVInferSchema(options) + var options = new CSVOptions(Map.empty[String, String], false, "UTC") + var inferSchema = new CSVInferSchema(options) assert( inferSchema.mergeRowTypes(Array(StringType), @@ -109,12 +109,28 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper { assert( inferSchema.mergeRowTypes(Array(DoubleType), Array(LongType)).sameElements(Array(DoubleType))) + + // Can merge DateType and TimestampType into TimestampType when no timestamp format specified assert( inferSchema.mergeRowTypes(Array(DateType), Array(TimestampNTZType)).sameElements(Array(TimestampNTZType))) assert( inferSchema.mergeRowTypes(Array(DateType), Array(TimestampType)).sameElements(Array(TimestampType))) + + // Merge DateType and TimestampType into StringType when there are timestamp formats specified + options = new CSVOptions( + Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss", + "timestampNTZFormat" -> "yyyy/MM/dd HH:mm:ss"), + false, + "UTC") + inferSchema = new CSVInferSchema(options) + assert( + inferSchema.mergeRowTypes(Array(DateType), + Array(TimestampNTZType)).sameElements(Array(StringType))) + assert( + inferSchema.mergeRowTypes(Array(DateType), + Array(TimestampType)).sameElements(Array(StringType))) } test("Null fields are handled properly when a nullValue is specified") { @@ -201,19 +217,18 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper { test("SPARK-39469: inferring date type") { // "yyyy/MM/dd" format - var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd", "prefersDate" -> "true"), + var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd"), false, "UTC") var inferSchema = new CSVInferSchema(options) assert(inferSchema.inferField(NullType, "2018/12/02") == DateType) // "MMM yyyy" format - options = new CSVOptions(Map("dateFormat" -> "MMM yyyy", "prefersDate" -> "true"), + options = new CSVOptions(Map("dateFormat" -> "MMM yyyy"), false, "GMT") inferSchema = new CSVInferSchema(options) assert(inferSchema.inferField(NullType, "Dec 2018") == DateType) // Field should strictly match date format to infer as date options = new CSVOptions( - Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", - "prefersDate" -> "true"), + Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss"), columnPruning = false, defaultTimeZoneId = "GMT") inferSchema = new CSVInferSchema(options) @@ -221,23 +236,24 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper { assert(inferSchema.inferField(NullType, "2018-12-03") == DateType) } - test("SPARK-39469: inferring date and timestamp types in a mixed column with prefersDate=true") { + test("SPARK-39469: inferring the schema of columns with mixing dates and timestamps properly") { var options = new CSVOptions( Map("dateFormat" -> "yyyy_MM_dd", "timestampFormat" -> "yyyy|MM|dd", - "timestampNTZFormat" -> "yyyy/MM/dd", "prefersDate" -> "true"), + "timestampNTZFormat" -> "yyyy/MM/dd"), columnPruning = false, defaultTimeZoneId = "UTC") var inferSchema = new CSVInferSchema(options) + assert(inferSchema.inferField(DateType, "2012_12_12") == DateType) - assert(inferSchema.inferField(DateType, "2003|01|01") == TimestampType) + + // inferField should infer a column as string type if it contains mixing dates and timestamps + assert(inferSchema.inferField(DateType, "2003|01|01") == StringType) // SQL configuration must be set to default to TimestampNTZ withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> "TIMESTAMP_NTZ") { - assert(inferSchema.inferField(DateType, "2003/02/05") == TimestampNTZType) + assert(inferSchema.inferField(DateType, "2003/02/05") == StringType) } - - // inferField should upgrade a date field to timestamp if the typeSoFar is a timestamp - assert(inferSchema.inferField(TimestampNTZType, "2012_12_12") == TimestampNTZType) - assert(inferSchema.inferField(TimestampType, "2018_12_03") == TimestampType) + assert(inferSchema.inferField(TimestampNTZType, "2012_12_12") == StringType) + assert(inferSchema.inferField(TimestampType, "2018_12_03") == StringType) // No errors when Date and Timestamp have the same format. Inference defaults to date options = new CSVOptions( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala index 42bc122dfdcb..37605e14b926 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.csv import java.math.BigDecimal import java.text.{DecimalFormat, DecimalFormatSymbols} -import java.time.{ZoneOffset} import java.util.{Locale, TimeZone} import org.apache.commons.lang3.time.FastDateFormat @@ -372,26 +371,4 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper { } assert(err.getMessage.contains("Illegal pattern character: n")) } - - test("SPARK-39469: dates should be parsed correctly in timestamp column when prefersDate=true") { - def checkDate(dataType: DataType): Unit = { - val timestampsOptions = - new CSVOptions(Map("prefersDate" -> "true", "timestampFormat" -> "dd/MM/yyyy HH:mm", - "timestampNTZFormat" -> "dd-MM-yyyy HH:mm", "dateFormat" -> "dd_MM_yyyy"), - false, DateTimeUtils.getZoneId("-08:00").toString) - // Use CSVOption ZoneId="-08:00" (PST) to test that Dates in TimestampNTZ column are always - // converted to their equivalent UTC timestamp - val dateString = "08_09_2001" - val expected = dataType match { - case TimestampType => date(2001, 9, 8, 0, 0, 0, 0, ZoneOffset.of("-08:00")) - case TimestampNTZType => date(2001, 9, 8, 0, 0, 0, 0, ZoneOffset.UTC) - case DateType => days(2001, 9, 8) - } - val parser = new UnivocityParser(new StructType(), timestampsOptions) - assert(parser.makeConverter("d", dataType).apply(dateString) == expected) - } - checkDate(TimestampType) - checkDate(TimestampNTZType) - checkDate(DateType) - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index a82b33fb0ee3..4091609f3008 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -36,7 +36,7 @@ import org.apache.hadoop.io.SequenceFile.CompressionType import org.apache.hadoop.io.compress.GzipCodec import org.apache.logging.log4j.Level -import org.apache.spark.{SparkConf, SparkException, SparkIllegalArgumentException, SparkUpgradeException, TestUtils} +import org.apache.spark.{SparkConf, SparkException, SparkUpgradeException, TestUtils} import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Encoders, QueryTest, Row} import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils} import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite @@ -2819,54 +2819,107 @@ abstract class CSVSuite } } - test("SPARK-39469: Infer schema for date type") { - val options1 = Map( - "header" -> "true", - "inferSchema" -> "true", - "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", - "dateFormat" -> "yyyy-MM-dd", - "prefersDate" -> "true") - val options2 = Map( - "header" -> "true", - "inferSchema" -> "true", - "prefersDate" -> "true") - - // Error should be thrown when attempting to prefersDate with Legacy parser - if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) { - checkError( - exception = intercept[SparkIllegalArgumentException] { - spark.read.format("csv").options(options1).load(testFile(dateInferSchemaFile)) - }, - errorClass = "CANNOT_INFER_DATE") - } else { - // 1. Specify date format and timestamp format - // 2. Date inference should work with default date format when dateFormat is not provided - Seq(options1, options2).foreach {options => - val results = spark.read - .format("csv") - .options(options) - .load(testFile(dateInferSchemaFile)) + test("SPARK-39469: Infer schema for columns with all dates") { + withTempPath { path => + Seq( + "2001-09-08", + "1941-01-02", + "0293-11-07" + ).toDF() + .repartition(1) + .write.text(path.getAbsolutePath) - val expectedSchema = StructType(List(StructField("date", DateType), - StructField("timestamp-date", TimestampType), - StructField("date-timestamp", TimestampType))) - assert(results.schema == expectedSchema) + val options = Map( + "header" -> "false", + "inferSchema" -> "true", + "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss") - val expected = - Seq( - Seq(Date.valueOf("2001-9-8"), Timestamp.valueOf("2014-10-27 18:30:0.0"), - Timestamp.valueOf("1765-03-28 00:00:0.0")), - Seq(Date.valueOf("1941-1-2"), Timestamp.valueOf("2000-09-14 01:01:0.0"), - Timestamp.valueOf("1423-11-12 23:41:0.0")), - Seq(Date.valueOf("0293-11-7"), Timestamp.valueOf("1995-06-25 00:00:00.0"), - Timestamp.valueOf("2016-01-28 20:00:00.0")) - ) - assert(results.collect().toSeq.map(_.toSeq) == expected) + val df = spark.read + .format("csv") + .options(options) + .load(path.getAbsolutePath) + + val expected = if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) { + // When legacy parser is enabled, `prefersDate` will be disabled + Seq( + Row("2001-09-08"), + Row("1941-01-02"), + Row("0293-11-07") + ) + } else { + Seq( + Row(Date.valueOf("2001-9-8")), + Row(Date.valueOf("1941-1-2")), + Row(Date.valueOf("0293-11-7")) + ) + } + + checkAnswer(df, expected) + } + } + + test("SPARK-40474: Infer schema for columns with a mix of dates and timestamp") { + withTempPath { path => + Seq( + "1765-03-28", + "1423-11-12T23:41:00", + "2016-01-28T20:00:00" + ).toDF() + .repartition(1) + .write.text(path.getAbsolutePath) + + if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) { + val options = Map( + "header" -> "false", + "inferSchema" -> "true", + "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss") + val df = spark.read + .format("csv") + .options(options) + .load(path.getAbsolutePath) + val expected = Seq( + Row(Timestamp.valueOf("1765-03-28 00:00:00.0")), + Row(Timestamp.valueOf("1423-11-12 23:41:00.0")), + Row(Timestamp.valueOf("2016-01-28 20:00:00.0")) + ) + checkAnswer(df, expected) + } else { + // When timestampFormat is specified, infer and parse the column as strings + val options1 = Map( + "header" -> "false", + "inferSchema" -> "true", + "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss") + val df1 = spark.read + .format("csv") + .options(options1) + .load(path.getAbsolutePath) + val expected1 = Seq( + Row("1765-03-28"), + Row("1423-11-12T23:41:00"), + Row("2016-01-28T20:00:00") + ) + checkAnswer(df1, expected1) + + // When timestampFormat is not specified, infer and parse the column as + // timestamp type if possible + val options2 = Map( + "header" -> "false", + "inferSchema" -> "true") + val df2 = spark.read + .format("csv") + .options(options2) + .load(path.getAbsolutePath) + val expected2 = Seq( + Row(Timestamp.valueOf("1765-03-28 00:00:00.0")), + Row(Timestamp.valueOf("1423-11-12 23:41:00.0")), + Row(Timestamp.valueOf("2016-01-28 20:00:00.0")) + ) + checkAnswer(df2, expected2) } } } - test("SPARK-39904: Parse incorrect timestamp values with prefersDate=true") { + test("SPARK-39904: Parse incorrect timestamp values") { withTempPath { path => Seq( "2020-02-01 12:34:56", @@ -2881,16 +2934,10 @@ abstract class CSVSuite val output = spark.read .schema(schema) - .option("prefersDate", "true") .csv(path.getAbsolutePath) - if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) { - checkError( - exception = intercept[SparkIllegalArgumentException] { - output.collect() - }, - errorClass = "CANNOT_INFER_DATE") - } else { + if (SQLConf.get.legacyTimeParserPolicy != LegacyBehaviorPolicy.LEGACY) { + // When legacy parser is enabled, `prefersDate` will be disabled checkAnswer( output, Seq(