From e4e4e7e29672549288afea92ac5d35910da7c51e Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 22 Dec 2018 20:21:56 +0100 Subject: [PATCH 1/2] SimpleDateFormat -> DateTimeFormatter --- python/pyspark/sql/readwriter.py | 16 ++++++++-------- python/pyspark/sql/streaming.py | 8 ++++---- .../org/apache/spark/sql/DataFrameReader.scala | 12 ++++++------ .../org/apache/spark/sql/DataFrameWriter.scala | 12 ++++++------ .../spark/sql/streaming/DataStreamReader.scala | 12 ++++++------ 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 7b10512a4329..35dc10b16909 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -226,11 +226,11 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, it uses the value specified in ``spark.sql.columnNameOfCorruptRecord``. :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at ``java.text.SimpleDateFormat``. This + follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.text.SimpleDateFormat``. + formats follow the formats at ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param multiLine: parse one record, which may span multiple lines, per file. If None is @@ -406,11 +406,11 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non :param negativeInf: sets the string representation of a negative infinity value. If None is set, it uses the default value, ``Inf``. :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at ``java.text.SimpleDateFormat``. This + follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.text.SimpleDateFormat``. + formats follow the formats at ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param maxColumns: defines a hard limit of how many columns a record can have. If None is @@ -803,11 +803,11 @@ def json(self, path, mode=None, compression=None, dateFormat=None, timestampForm known case-insensitive shorten names (none, bzip2, gzip, lz4, snappy and deflate). :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at ``java.text.SimpleDateFormat``. This + follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.text.SimpleDateFormat``. + formats follow the formats at ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param encoding: specifies encoding (charset) of saved json files. If None is set, @@ -904,11 +904,11 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No :param nullValue: sets the string representation of a null value. If None is set, it uses the default value, empty string. :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at ``java.text.SimpleDateFormat``. This + follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.text.SimpleDateFormat``. + formats follow the formats at ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param ignoreLeadingWhiteSpace: a flag indicating whether or not leading whitespaces from diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index fc23b9d99c34..578f9c7cb9c8 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -456,11 +456,11 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, it uses the value specified in ``spark.sql.columnNameOfCorruptRecord``. :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at ``java.text.SimpleDateFormat``. This + follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.text.SimpleDateFormat``. + formats follow the formats at ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param multiLine: parse one record, which may span multiple lines, per file. If None is @@ -630,11 +630,11 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non :param negativeInf: sets the string representation of a negative infinity value. If None is set, it uses the default value, ``Inf``. :param dateFormat: sets the string that indicates a date format. Custom date formats - follow the formats at ``java.text.SimpleDateFormat``. This + follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.text.SimpleDateFormat``. + formats follow the formats at ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param maxColumns: defines a hard limit of how many columns a record can have. If None is diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 9751528654ff..ce8e4c8f5b82 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -375,11 +375,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`. *
  • `dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format. - * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to - * date type.
  • + * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`. + * This applies to date type. *
  • `timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at - * `java.text.SimpleDateFormat`. This applies to timestamp type.
  • + * `java.time.format.DateTimeFormatter`. This applies to timestamp type. *
  • `multiLine` (default `false`): parse one record, which may span multiple lines, * per file
  • *
  • `encoding` (by default it is not set): allows to forcibly set one of standard basic @@ -585,11 +585,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { *
  • `negativeInf` (default `-Inf`): sets the string representation of a negative infinity * value.
  • *
  • `dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format. - * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to - * date type.
  • + * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`. + * This applies to date type. *
  • `timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at - * `java.text.SimpleDateFormat`. This applies to timestamp type.
  • + * `java.time.format.DateTimeFormatter`. This applies to timestamp type. *
  • `maxColumns` (default `20480`): defines a hard limit of how many columns * a record can have.
  • *
  • `maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index b9c4076994e9..981b3a8fd4ac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -530,11 +530,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`, * `snappy` and `deflate`).
  • *
  • `dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format. - * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to - * date type.
  • + * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`. + * This applies to date type. *
  • `timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at - * `java.text.SimpleDateFormat`. This applies to timestamp type.
  • + * `java.time.format.DateTimeFormatter`. This applies to timestamp type. *
  • `encoding` (by default it is not set): specifies encoding (charset) of saved json * files. If it is not set, the UTF-8 charset will be used.
  • *
  • `lineSep` (default `\n`): defines the line separator that should be used for writing.
  • @@ -649,11 +649,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`, * `snappy` and `deflate`). *
  • `dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format. - * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to - * date type.
  • + * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`. + * This applies to date type. *
  • `timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at - * `java.text.SimpleDateFormat`. This applies to timestamp type.
  • + * `java.time.format.DateTimeFormatter`. This applies to timestamp type. *
  • `ignoreLeadingWhiteSpace` (default `true`): a flag indicating whether or not leading * whitespaces from values being written should be skipped.
  • *
  • `ignoreTrailingWhiteSpace` (default `true`): a flag indicating defines whether or not diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 914fa90ae7e1..98589da9552c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -286,11 +286,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`.
  • *
  • `dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format. - * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to - * date type.
  • + * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`. + * This applies to date type. *
  • `timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at - * `java.text.SimpleDateFormat`. This applies to timestamp type.
  • + * `java.time.format.DateTimeFormatter`. This applies to timestamp type. *
  • `multiLine` (default `false`): parse one record, which may span multiple lines, * per file
  • *
  • `lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator @@ -347,11 +347,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo *
  • `negativeInf` (default `-Inf`): sets the string representation of a negative infinity * value.
  • *
  • `dateFormat` (default `yyyy-MM-dd`): sets the string that indicates a date format. - * Custom date formats follow the formats at `java.text.SimpleDateFormat`. This applies to - * date type.
  • + * Custom date formats follow the formats at `java.time.format.DateTimeFormatter`. + * This applies to date type. *
  • `timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at - * `java.text.SimpleDateFormat`. This applies to timestamp type.
  • + * `java.time.format.DateTimeFormatter`. This applies to timestamp type. *
  • `maxColumns` (default `20480`): defines a hard limit of how many columns * a record can have.
  • *
  • `maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed From 77a1ef156eff33cfa4ca1448a2d908807ae63d6e Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 22 Dec 2018 20:35:57 +0100 Subject: [PATCH 2/2] Making python style checker happy. --- python/pyspark/sql/readwriter.py | 20 ++++++++++++-------- python/pyspark/sql/streaming.py | 10 ++++++---- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 35dc10b16909..3da052391a95 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -229,8 +229,9 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.time.format.DateTimeFormatter``. + :param timestampFormat: sets the string that indicates a timestamp format. + Custom date formats follow the formats at + ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param multiLine: parse one record, which may span multiple lines, per file. If None is @@ -409,8 +410,9 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.time.format.DateTimeFormatter``. + :param timestampFormat: sets the string that indicates a timestamp format. + Custom date formats follow the formats at + ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param maxColumns: defines a hard limit of how many columns a record can have. If None is @@ -806,8 +808,9 @@ def json(self, path, mode=None, compression=None, dateFormat=None, timestampForm follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.time.format.DateTimeFormatter``. + :param timestampFormat: sets the string that indicates a timestamp format. + Custom date formats follow the formats at + ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param encoding: specifies encoding (charset) of saved json files. If None is set, @@ -907,8 +910,9 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.time.format.DateTimeFormatter``. + :param timestampFormat: sets the string that indicates a timestamp format. + Custom date formats follow the formats at + ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param ignoreLeadingWhiteSpace: a flag indicating whether or not leading whitespaces from diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 578f9c7cb9c8..b981fdc4edc7 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -459,8 +459,9 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.time.format.DateTimeFormatter``. + :param timestampFormat: sets the string that indicates a timestamp format. + Custom date formats follow the formats at + ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param multiLine: parse one record, which may span multiple lines, per file. If None is @@ -633,8 +634,9 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non follow the formats at ``java.time.format.DateTimeFormatter``. This applies to date type. If None is set, it uses the default value, ``yyyy-MM-dd``. - :param timestampFormat: sets the string that indicates a timestamp format. Custom date - formats follow the formats at ``java.time.format.DateTimeFormatter``. + :param timestampFormat: sets the string that indicates a timestamp format. + Custom date formats follow the formats at + ``java.time.format.DateTimeFormatter``. This applies to timestamp type. If None is set, it uses the default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``. :param maxColumns: defines a hard limit of how many columns a record can have. If None is