Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions python/pyspark/sql/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ def text(self, path, compression=None):

@since(2.0)
def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=None,
header=None, nullValue=None, escapeQuotes=None):
header=None, nullValue=None, escapeQuotes=None, quoteAll=None):
"""Saves the content of the :class:`DataFrame` in CSV format at the specified path.

:param path: the path in any Hadoop supported file system
Expand All @@ -658,6 +658,9 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
:param escapeQuotes: A flag indicating whether values containing quotes should always
be enclosed in quotes. If None is set, it uses the default value
``true``, escaping all values containing a quote character.
:param quoteAll: A flag indicating whether all values should always be enclosed in
quotes. If None is set, it uses the default value ``false``,
only escaping values containing a quote character.
:param header: writes the names of columns as the first line. If None is set, it uses
the default value, ``false``.
:param nullValue: sets the string representation of a null value. If None is set, it uses
Expand All @@ -667,7 +670,7 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
"""
self.mode(mode)
self._set_opts(compression=compression, sep=sep, quote=quote, escape=escape, header=header,
nullValue=nullValue, escapeQuotes=escapeQuotes)
nullValue=nullValue, escapeQuotes=escapeQuotes, quoteAll=quoteAll)
self._jwrite.csv(path)

@since(1.5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
* <li>`escapeQuotes` (default `true`): a flag indicating whether values containing
* quotes should always be enclosed in quotes. Default is to escape all values containing
* a quote character.</li>
* <li>`quoteAll` (default `false`): A flag indicating whether all values should always be
* enclosed in quotes. Default is to only escape values containing a quote character.</li>
* <li>`header` (default `false`): writes the names of columns as the first line.</li>
* <li>`nullValue` (default empty string): sets the string representation of a null value.</li>
* <li>`compression` (default `null`): compression codec to use when saving to file. This can be
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ private[sql] class CSVOptions(@transient private val parameters: Map[String, Str

val maxMalformedLogPerPartition = getInt("maxMalformedLogPerPartition", 10)

val quoteAll = getBool("quoteAll", false)

val inputBufferSize = 128

val isCommentSet = this.comment != '\u0000'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) exten
writerSettings.setNullValue(params.nullValue)
writerSettings.setEmptyValue(params.nullValue)
writerSettings.setSkipEmptyLines(true)
writerSettings.setQuoteAllFields(false)
writerSettings.setQuoteAllFields(params.quoteAll)
writerSettings.setHeaders(headers: _*)
writerSettings.setQuoteEscapingEnabled(params.escapeQuotes)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,32 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
}
}

test("save csv with quoteAll enabled") {
Copy link
Member

@HyukjinKwon HyukjinKwon Jul 1, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems escapeAll and quoteAll are mixed (across the updated files)..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed :)

withTempDir { dir =>
val csvDir = new File(dir, "csv").getCanonicalPath

val data = Seq(("test \"quote\"", 123, "it \"works\"!", "\"very\" well"))
val df = spark.createDataFrame(data)

// escapeQuotes should be true by default
df.coalesce(1).write
.format("csv")
.option("quote", "\"")
.option("escape", "\"")
.option("quoteAll", "true")
.save(csvDir)

val results = spark.read
.format("text")
.load(csvDir)
.collect()

val expected = "\"test \"\"quote\"\"\",\"123\",\"it \"\"works\"\"!\",\"\"\"very\"\" well\""

assert(results.toSeq.map(_.toSeq) === Seq(Seq(expected)))
}
}

test("save csv with quote escaping enabled") {
withTempDir { dir =>
val csvDir = new File(dir, "csv").getCanonicalPath
Expand Down