diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala index 7ce65fa89b02..b367b3d0ac64 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala @@ -97,23 +97,25 @@ object CSVUtils { */ @throws[IllegalArgumentException] def toChar(str: String): Char = { - if (str.charAt(0) == '\\') { - str.charAt(1) - match { - case 't' => '\t' - case 'r' => '\r' - case 'b' => '\b' - case 'f' => '\f' - case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options - case '\'' => '\'' - case 'u' if str == """\u0000""" => '\u0000' - case _ => - throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str") - } - } else if (str.length == 1) { - str.charAt(0) - } else { - throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str") + (str: Seq[Char]) match { + case Seq() => throw new IllegalArgumentException("Delimiter cannot be empty string") + case Seq('\\') => throw new IllegalArgumentException("Single backslash is prohibited." + + " It has special meaning as beginning of an escape sequence." + + " To get the backslash character, pass a string with two backslashes as the delimiter.") + case Seq(c) => c + case Seq('\\', 't') => '\t' + case Seq('\\', 'r') => '\r' + case Seq('\\', 'b') => '\b' + case Seq('\\', 'f') => '\f' + // In case user changes quote char and uses \" as delimiter in options + case Seq('\\', '\"') => '\"' + case Seq('\\', '\'') => '\'' + case Seq('\\', '\\') => '\\' + case _ if str == """\u0000""" => '\u0000' + case Seq('\\', _) => + throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str") + case _ => + throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 5d4746cf90b3..d59035b716cf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -1826,4 +1826,14 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te val df = spark.read.option("enforceSchema", false).csv(input) checkAnswer(df, Row("1", "2")) } + + test("using the backward slash as the delimiter") { + val input = Seq("""abc\1""").toDS() + val delimiter = """\\""" + checkAnswer(spark.read.option("delimiter", delimiter).csv(input), Row("abc", "1")) + checkAnswer(spark.read.option("inferSchema", true).option("delimiter", delimiter).csv(input), + Row("abc", 1)) + val schema = new StructType().add("a", StringType).add("b", IntegerType) + checkAnswer(spark.read.schema(schema).option("delimiter", delimiter).csv(input), Row("abc", 1)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtilsSuite.scala index 221e44ce2cff..60fcbd2ff008 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtilsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtilsSuite.scala @@ -28,6 +28,7 @@ class CSVUtilsSuite extends SparkFunSuite { assert(CSVUtils.toChar("""\"""") === '\"') assert(CSVUtils.toChar("""\'""") === '\'') assert(CSVUtils.toChar("""\u0000""") === '\u0000') + assert(CSVUtils.toChar("""\\""") === '\\') } test("Does not accept delimiter larger than one character") { @@ -44,4 +45,17 @@ class CSVUtilsSuite extends SparkFunSuite { assert(exception.getMessage.contains("Unsupported special character for delimiter")) } + test("string with one backward slash is prohibited") { + val exception = intercept[IllegalArgumentException]{ + CSVUtils.toChar("""\""") + } + assert(exception.getMessage.contains("Single backslash is prohibited")) + } + + test("output proper error message for empty string") { + val exception = intercept[IllegalArgumentException]{ + CSVUtils.toChar("") + } + assert(exception.getMessage.contains("Delimiter cannot be empty string")) + } }