Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions python/pyspark/sql/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,22 +305,22 @@ def table(self, tableName):

@since(1.4)
def parquet(self, *paths, **options):
"""Loads Parquet files, returning the result as a :class:`DataFrame`.
"""
Loads Parquet files, returning the result as a :class:`DataFrame`.

:param mergeSchema: sets whether we should merge schemas collected from all
Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``.
The default value is specified in ``spark.sql.parquet.mergeSchema``.
:param recursiveFileLookup: recursively scan a directory for files. Using this option
disables `partition discovery`_.

You can set the following Parquet-specific option(s) for reading Parquet files:
* ``mergeSchema``: sets whether we should merge schemas collected from all \
Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``. \
The default value is specified in ``spark.sql.parquet.mergeSchema``.

>>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
>>> df.dtypes
[('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
"""
mergeSchema = options.get('mergeSchema', None)
recursiveFileLookup = options.get('recursiveFileLookup', None)
self._set_opts(recursiveFileLookup=recursiveFileLookup)
self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup)
return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths)))

@ignore_unicode_prefix
Expand Down
19 changes: 9 additions & 10 deletions python/pyspark/sql/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,26 +535,25 @@ def orc(self, path, recursiveFileLookup=None):
raise TypeError("path can be only a single string")

@since(2.0)
def parquet(self, path, recursiveFileLookup=None):
"""Loads a Parquet file stream, returning the result as a :class:`DataFrame`.
def parquet(self, path, mergeSchema=None, recursiveFileLookup=None):
"""
Loads a Parquet file stream, returning the result as a :class:`DataFrame`.

.. note:: Evolving.

:param mergeSchema: sets whether we should merge schemas collected from all
Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``.
The default value is specified in ``spark.sql.parquet.mergeSchema``.
:param recursiveFileLookup: recursively scan a directory for files. Using this option
disables `partition discovery`_.

You can set the following Parquet-specific option(s) for reading Parquet files:
* ``mergeSchema``: sets whether we should merge schemas collected from all \
Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``. \
The default value is specified in ``spark.sql.parquet.mergeSchema``.

.. note:: Evolving.

>>> parquet_sdf = spark.readStream.schema(sdf_schema).parquet(tempfile.mkdtemp())
>>> parquet_sdf.isStreaming
True
>>> parquet_sdf.schema == sdf_schema
True
"""
self._set_opts(recursiveFileLookup=recursiveFileLookup)
self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup)
if isinstance(path, basestring):
return self._df(self._jreader.parquet(path))
else:
Expand Down