Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions python/pyspark/sql/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,17 +520,20 @@ def func(iterator):
raise TypeError("path can be only string, list or RDD")

@since(1.5)
def orc(self, path, recursiveFileLookup=None):
def orc(self, path, mergeSchema=None, recursiveFileLookup=None):
"""Loads ORC files, returning the result as a :class:`DataFrame`.

:param mergeSchema: sets whether we should merge schemas collected from all
ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
The default value is specified in ``spark.sql.orc.mergeSchema``.
:param recursiveFileLookup: recursively scan a directory for files. Using this option
disables `partition discovery`_.
disables `partition discovery`_.

>>> df = spark.read.orc('python/test_support/sql/orc_partitioned')
>>> df.dtypes
[('a', 'bigint'), ('b', 'int'), ('c', 'int')]
"""
self._set_opts(recursiveFileLookup=recursiveFileLookup)
self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup)
if isinstance(path, basestring):
path = [path]
return self._df(self._jreader.orc(_to_seq(self._spark._sc, path)))
Expand Down
9 changes: 6 additions & 3 deletions python/pyspark/sql/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,21 +514,24 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
raise TypeError("path can be only a single string")

@since(2.3)
def orc(self, path, recursiveFileLookup=None):
def orc(self, path, mergeSchema=None, recursiveFileLookup=None):
"""Loads a ORC file stream, returning the result as a :class:`DataFrame`.

.. note:: Evolving.

:param mergeSchema: sets whether we should merge schemas collected from all
ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
The default value is specified in ``spark.sql.orc.mergeSchema``.
:param recursiveFileLookup: recursively scan a directory for files. Using this option
disables `partition discovery`_.
disables `partition discovery`_.

>>> orc_sdf = spark.readStream.schema(sdf_schema).orc(tempfile.mkdtemp())
>>> orc_sdf.isStreaming
True
>>> orc_sdf.schema == sdf_schema
True
"""
self._set_opts(recursiveFileLookup=recursiveFileLookup)
self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup)
if isinstance(path, basestring):
return self._df(self._jreader.orc(path))
else:
Expand Down