Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev/deps/spark-deps-hadoop-2.2
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ stax-api-1.0.1.jar
stream-2.7.0.jar
stringtemplate-3.2.1.jar
super-csv-2.2.0.jar
univocity-parsers-2.1.1.jar
univocity-parsers-2.2.1.jar
validation-api-1.1.0.Final.jar
xbean-asm5-shaded-4.4.jar
xmlenc-0.52.jar
Expand Down
2 changes: 1 addition & 1 deletion dev/deps/spark-deps-hadoop-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ stax-api-1.0.1.jar
stream-2.7.0.jar
stringtemplate-3.2.1.jar
super-csv-2.2.0.jar
univocity-parsers-2.1.1.jar
univocity-parsers-2.2.1.jar
validation-api-1.1.0.Final.jar
xbean-asm5-shaded-4.4.jar
xmlenc-0.52.jar
Expand Down
2 changes: 1 addition & 1 deletion dev/deps/spark-deps-hadoop-2.4
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ stax-api-1.0.1.jar
stream-2.7.0.jar
stringtemplate-3.2.1.jar
super-csv-2.2.0.jar
univocity-parsers-2.1.1.jar
univocity-parsers-2.2.1.jar
validation-api-1.1.0.Final.jar
xbean-asm5-shaded-4.4.jar
xmlenc-0.52.jar
Expand Down
2 changes: 1 addition & 1 deletion dev/deps/spark-deps-hadoop-2.6
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ stax-api-1.0.1.jar
stream-2.7.0.jar
stringtemplate-3.2.1.jar
super-csv-2.2.0.jar
univocity-parsers-2.1.1.jar
univocity-parsers-2.2.1.jar
validation-api-1.1.0.Final.jar
xbean-asm5-shaded-4.4.jar
xercesImpl-2.9.1.jar
Expand Down
2 changes: 1 addition & 1 deletion dev/deps/spark-deps-hadoop-2.7
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ stax-api-1.0.1.jar
stream-2.7.0.jar
stringtemplate-3.2.1.jar
super-csv-2.2.0.jar
univocity-parsers-2.1.1.jar
univocity-parsers-2.2.1.jar
validation-api-1.1.0.Final.jar
xbean-asm5-shaded-4.4.jar
xercesImpl-2.9.1.jar
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
set, it uses the default value, ``20480``.
:param maxCharsPerColumn: defines the maximum number of characters allowed for any given
value being read. If None is set, it uses the default value,
``1000000``.
``-1`` meaning unlimited length.
:param maxMalformedLogPerPartition: sets the maximum number of malformed rows Spark will
log for each partition. Malformed records beyond this
number will be ignored. If None is set, it
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
set, it uses the default value, ``20480``.
:param maxCharsPerColumn: defines the maximum number of characters allowed for any given
value being read. If None is set, it uses the default value,
``1000000``.
``-1`` meaning unlimited length.
:param mode: allows a mode for dealing with corrupt records during parsing. If None is
set, it uses the default value, ``PERMISSIVE``.

Expand Down
2 changes: 1 addition & 1 deletion sql/core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
<dependency>
<groupId>com.univocity</groupId>
<artifactId>univocity-parsers</artifactId>
<version>2.1.1</version>
<version>2.2.1</version>
<type>jar</type>
</dependency>
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
* `java.sql.Timestamp.valueOf()` and `java.sql.Date.valueOf()` or ISO 8601 format.</li>
* <li>`maxColumns` (default `20480`): defines a hard limit of how many columns
* a record can have.</li>
* <li>`maxCharsPerColumn` (default `1000000`): defines the maximum number of characters allowed
* for any given value being read.</li>
* <li>`maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed
* for any given value being read. By default, it is -1 meaning unlimited length</li>
* <li>`maxMalformedLogPerPartition` (default `10`): sets the maximum number of malformed rows
* Spark will log for each partition. Malformed records beyond this number will be ignored.</li>
* <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ private[csv] class CSVOptions(@transient private val parameters: Map[String, Str

val maxColumns = getInt("maxColumns", 20480)

val maxCharsPerColumn = getInt("maxCharsPerColumn", 1000000)
val maxCharsPerColumn = getInt("maxCharsPerColumn", -1)

val escapeQuotes = getBool("escapeQuotes", true)

Expand All @@ -123,8 +123,6 @@ private[csv] class CSVOptions(@transient private val parameters: Map[String, Str
val inputBufferSize = 128

val isCommentSet = this.comment != '\u0000'

val rowSeparator = "\n"
}

object CSVOptions {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ private[csv] class CsvReader(params: CSVOptions) {
val settings = new CsvParserSettings()
val format = settings.getFormat
format.setDelimiter(params.delimiter)
format.setLineSeparator(params.rowSeparator)
format.setQuote(params.quote)
format.setQuoteEscape(params.escape)
format.setComment(params.comment)
Expand Down Expand Up @@ -70,7 +69,6 @@ private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) exten
private val format = writerSettings.getFormat

format.setDelimiter(params.delimiter)
format.setLineSeparator(params.rowSeparator)
format.setQuote(params.quote)
format.setQuoteEscape(params.escape)
format.setComment(params.comment)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
* `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
* <li>`maxColumns` (default `20480`): defines a hard limit of how many columns
* a record can have.</li>
* <li>`maxCharsPerColumn` (default `1000000`): defines the maximum number of characters allowed
* for any given value being read.</li>
* <li>`maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed
* for any given value being read. By default, it is -1 meaning unlimited length</li>
* <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
* during parsing.
* <ul>
Expand Down