diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala index fab8d62da0c1..60d26ff06755 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala @@ -173,7 +173,11 @@ class CSVOptions( writerSettings.setIgnoreLeadingWhitespaces(ignoreLeadingWhiteSpaceFlagInWrite) writerSettings.setIgnoreTrailingWhitespaces(ignoreTrailingWhiteSpaceFlagInWrite) writerSettings.setNullValue(nullValue) - writerSettings.setEmptyValue("\"\"") + if (nullValue == "" || quote == '\u0000') { + writerSettings.setEmptyValue(nullValue) + } else { + writerSettings.setEmptyValue(s"${quote}${quote}") + } writerSettings.setSkipEmptyLines(true) writerSettings.setQuoteAllFields(quoteAll) writerSettings.setQuoteEscapingEnabled(escapeQuotes) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 5a1d6679ebbd..f2ebc7d29a4d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -1357,11 +1357,54 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te checkAnswer(computed, expected) } - // Keeps the old behavior where empty string us coerced to nullValue is not passed. + + // Checks for new behavior where an empty string is not coerced to null when `nullValue` is + // set to anything but an empty string literal and quote character is changed. + withTempPath { path => + df.write + .option("nullValue", "-") + .option("quote", "'") + .csv(path.getAbsolutePath) + val computed = spark.read + .option("nullValue", "-") + .option("quote", "'") + .schema(df.schema) + .csv(path.getAbsolutePath) + val expected = Seq( + (1, "John Doe"), + (2, ""), + (3, litNull), + (4, litNull) + ).toDF("id", "name") + + checkAnswer(computed, expected) + } + + // Keeps the old behavior where empty string is coerced to nullValue if not passed. + withTempPath { path => + df.write + .csv(path.getAbsolutePath) + val computed = spark.read + .schema(df.schema) + .csv(path.getAbsolutePath) + val expected = Seq( + (1, "John Doe"), + (2, litNull), + (3, "-"), + (4, litNull) + ).toDF("id", "name") + + checkAnswer(computed, expected) + } + + // Keeps the old behavior where empty string is coerced to nullValue if not passed + // with quotes disabled. withTempPath { path => df.write + .option("quote", "") .csv(path.getAbsolutePath) val computed = spark.read + .option("quote", "") .schema(df.schema) .csv(path.getAbsolutePath) val expected = Seq(