apache · nchammas · Dec 4, 2019
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
@@ -520,17 +520,20 @@ def func(iterator):
             raise TypeError("path can be only string, list or RDD")
 
     @since(1.5)
-    def orc(self, path, recursiveFileLookup=None):
+    def orc(self, path, mergeSchema=None, recursiveFileLookup=None):
         """Loads ORC files, returning the result as a :class:`DataFrame`.
 
+        :param mergeSchema: sets whether we should merge schemas collected from all
+            ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
+            The default value is specified in ``spark.sql.orc.mergeSchema``.
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
+            disables `partition discovery`_.
 
         >>> df = spark.read.orc('python/test_support/sql/orc_partitioned')
         >>> df.dtypes
         [('a', 'bigint'), ('b', 'int'), ('c', 'int')]
         """
-        self._set_opts(recursiveFileLookup=recursiveFileLookup)
+        self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             path = [path]
         return self._df(self._jreader.orc(_to_seq(self._spark._sc, path)))

diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
@@ -514,21 +514,24 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
             raise TypeError("path can be only a single string")
 
     @since(2.3)
-    def orc(self, path, recursiveFileLookup=None):
+    def orc(self, path, mergeSchema=None, recursiveFileLookup=None):
         """Loads a ORC file stream, returning the result as a :class:`DataFrame`.
 
         .. note:: Evolving.
 
+        :param mergeSchema: sets whether we should merge schemas collected from all
+            ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
+            The default value is specified in ``spark.sql.orc.mergeSchema``.
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
+            disables `partition discovery`_.
 
         >>> orc_sdf = spark.readStream.schema(sdf_schema).orc(tempfile.mkdtemp())
         >>> orc_sdf.isStreaming
         True
         >>> orc_sdf.schema == sdf_schema
         True
         """
-        self._set_opts(recursiveFileLookup=recursiveFileLookup)
+        self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             return self._df(self._jreader.orc(path))
         else: