diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 198313b10ec1..3d3280dbd994 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -171,7 +171,8 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None, mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None, multiLine=None, allowUnquotedControlChars=None, lineSep=None, samplingRatio=None, - dropFieldIfAllNull=None, encoding=None, locale=None, recursiveFileLookup=None): + dropFieldIfAllNull=None, encoding=None, locale=None, pathGlobFilter=None, + recursiveFileLookup=None): """ Loads JSON files and returns the results as a :class:`DataFrame`. @@ -247,6 +248,9 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set, it uses the default value, ``en-US``. For instance, ``locale`` is used while parsing dates and timestamps. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option disables `partition discovery`_. @@ -270,7 +274,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, timestampFormat=timestampFormat, multiLine=multiLine, allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep, samplingRatio=samplingRatio, dropFieldIfAllNull=dropFieldIfAllNull, encoding=encoding, - locale=locale, recursiveFileLookup=recursiveFileLookup) + locale=locale, pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup) if isinstance(path, basestring): path = [path] if type(path) == list: @@ -309,8 +313,12 @@ def parquet(self, *paths, **options): Loads Parquet files, returning the result as a :class:`DataFrame`. :param mergeSchema: sets whether we should merge schemas collected from all - Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``. - The default value is specified in ``spark.sql.parquet.mergeSchema``. + Parquet part-files. This will override + ``spark.sql.parquet.mergeSchema``. The default value is specified in + ``spark.sql.parquet.mergeSchema``. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option disables `partition discovery`_. @@ -319,13 +327,16 @@ def parquet(self, *paths, **options): [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')] """ mergeSchema = options.get('mergeSchema', None) + pathGlobFilter = options.get('pathGlobFilter', None) recursiveFileLookup = options.get('recursiveFileLookup', None) - self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup) + self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter, + recursiveFileLookup=recursiveFileLookup) return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths))) @ignore_unicode_prefix @since(1.6) - def text(self, paths, wholetext=False, lineSep=None, recursiveFileLookup=None): + def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None, + recursiveFileLookup=None): """ Loads text files and returns a :class:`DataFrame` whose schema starts with a string column named "value", and followed by partitioned columns if there @@ -338,6 +349,9 @@ def text(self, paths, wholetext=False, lineSep=None, recursiveFileLookup=None): :param wholetext: if true, read each file from input path(s) as a single row. :param lineSep: defines the line separator that should be used for parsing. If None is set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option disables `partition discovery`_. @@ -349,7 +363,8 @@ def text(self, paths, wholetext=False, lineSep=None, recursiveFileLookup=None): [Row(value=u'hello\\nthis')] """ self._set_opts( - wholetext=wholetext, lineSep=lineSep, recursiveFileLookup=recursiveFileLookup) + wholetext=wholetext, lineSep=lineSep, pathGlobFilter=pathGlobFilter, + recursiveFileLookup=recursiveFileLookup) if isinstance(paths, basestring): paths = [paths] return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths))) @@ -362,7 +377,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None, columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None, samplingRatio=None, enforceSchema=None, emptyValue=None, locale=None, lineSep=None, - recursiveFileLookup=None): + pathGlobFilter=None, recursiveFileLookup=None): r"""Loads a CSV file and returns the result as a :class:`DataFrame`. This function will go through the input once to determine the input schema if @@ -470,6 +485,9 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non :param lineSep: defines the line separator that should be used for parsing. If None is set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. Maximum length is 1 character. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option disables `partition discovery`_. @@ -492,7 +510,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine, charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, samplingRatio=samplingRatio, enforceSchema=enforceSchema, emptyValue=emptyValue, locale=locale, lineSep=lineSep, - recursiveFileLookup=recursiveFileLookup) + pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup) if isinstance(path, basestring): path = [path] if type(path) == list: @@ -520,20 +538,24 @@ def func(iterator): raise TypeError("path can be only string, list or RDD") @since(1.5) - def orc(self, path, mergeSchema=None, recursiveFileLookup=None): + def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None): """Loads ORC files, returning the result as a :class:`DataFrame`. :param mergeSchema: sets whether we should merge schemas collected from all - ORC part-files. This will override ``spark.sql.orc.mergeSchema``. - The default value is specified in ``spark.sql.orc.mergeSchema``. + ORC part-files. This will override ``spark.sql.orc.mergeSchema``. + The default value is specified in ``spark.sql.orc.mergeSchema``. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option - disables `partition discovery`_. + disables `partition discovery`_. >>> df = spark.read.orc('python/test_support/sql/orc_partitioned') >>> df.dtypes [('a', 'bigint'), ('b', 'int'), ('c', 'int')] """ - self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup) + self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter, + recursiveFileLookup=recursiveFileLookup) if isinstance(path, basestring): path = [path] return self._df(self._jreader.orc(_to_seq(self._spark._sc, path))) diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 2937bc0bda70..f17a52f6b3dc 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -341,9 +341,6 @@ def option(self, key, value): * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps in the JSON/CSV datasources or partition values. If it isn't set, it uses the default value, session local timezone. - * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. - It does not change the behavior of partition discovery. .. note:: Evolving. @@ -360,9 +357,6 @@ def options(self, **options): * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps in the JSON/CSV datasources or partition values. If it isn't set, it uses the default value, session local timezone. - * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching - the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. - It does not change the behavior of partition discovery. .. note:: Evolving. @@ -411,7 +405,8 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None, mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None, multiLine=None, allowUnquotedControlChars=None, lineSep=None, locale=None, - dropFieldIfAllNull=None, encoding=None, recursiveFileLookup=None): + dropFieldIfAllNull=None, encoding=None, pathGlobFilter=None, + recursiveFileLookup=None): """ Loads a JSON file stream and returns the results as a :class:`DataFrame`. @@ -487,6 +482,9 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, the JSON files. For example UTF-16BE, UTF-32LE. If None is set, the encoding of input JSON will be detected automatically when the multiLine option is set to ``true``. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option disables `partition discovery`_. @@ -507,21 +505,24 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None, timestampFormat=timestampFormat, multiLine=multiLine, allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep, locale=locale, dropFieldIfAllNull=dropFieldIfAllNull, encoding=encoding, - recursiveFileLookup=recursiveFileLookup) + pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup) if isinstance(path, basestring): return self._df(self._jreader.json(path)) else: raise TypeError("path can be only a single string") @since(2.3) - def orc(self, path, mergeSchema=None, recursiveFileLookup=None): + def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None): """Loads a ORC file stream, returning the result as a :class:`DataFrame`. .. note:: Evolving. :param mergeSchema: sets whether we should merge schemas collected from all - ORC part-files. This will override ``spark.sql.orc.mergeSchema``. - The default value is specified in ``spark.sql.orc.mergeSchema``. + ORC part-files. This will override ``spark.sql.orc.mergeSchema``. + The default value is specified in ``spark.sql.orc.mergeSchema``. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option disables `partition discovery`_. @@ -531,22 +532,27 @@ def orc(self, path, mergeSchema=None, recursiveFileLookup=None): >>> orc_sdf.schema == sdf_schema True """ - self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup) + self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter, + recursiveFileLookup=recursiveFileLookup) if isinstance(path, basestring): return self._df(self._jreader.orc(path)) else: raise TypeError("path can be only a single string") @since(2.0) - def parquet(self, path, mergeSchema=None, recursiveFileLookup=None): + def parquet(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None): """ Loads a Parquet file stream, returning the result as a :class:`DataFrame`. .. note:: Evolving. :param mergeSchema: sets whether we should merge schemas collected from all - Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``. - The default value is specified in ``spark.sql.parquet.mergeSchema``. + Parquet part-files. This will override + ``spark.sql.parquet.mergeSchema``. The default value is specified in + ``spark.sql.parquet.mergeSchema``. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option disables `partition discovery`_. @@ -556,7 +562,8 @@ def parquet(self, path, mergeSchema=None, recursiveFileLookup=None): >>> parquet_sdf.schema == sdf_schema True """ - self._set_opts(mergeSchema=mergeSchema, recursiveFileLookup=recursiveFileLookup) + self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter, + recursiveFileLookup=recursiveFileLookup) if isinstance(path, basestring): return self._df(self._jreader.parquet(path)) else: @@ -564,7 +571,8 @@ def parquet(self, path, mergeSchema=None, recursiveFileLookup=None): @ignore_unicode_prefix @since(2.0) - def text(self, path, wholetext=False, lineSep=None, recursiveFileLookup=None): + def text(self, path, wholetext=False, lineSep=None, pathGlobFilter=None, + recursiveFileLookup=None): """ Loads a text file stream and returns a :class:`DataFrame` whose schema starts with a string column named "value", and followed by partitioned columns if there @@ -579,6 +587,9 @@ def text(self, path, wholetext=False, lineSep=None, recursiveFileLookup=None): :param wholetext: if true, read each file from input path(s) as a single row. :param lineSep: defines the line separator that should be used for parsing. If None is set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option disables `partition discovery`_. @@ -589,7 +600,8 @@ def text(self, path, wholetext=False, lineSep=None, recursiveFileLookup=None): True """ self._set_opts( - wholetext=wholetext, lineSep=lineSep, recursiveFileLookup=recursiveFileLookup) + wholetext=wholetext, lineSep=lineSep, pathGlobFilter=pathGlobFilter, + recursiveFileLookup=recursiveFileLookup) if isinstance(path, basestring): return self._df(self._jreader.text(path)) else: @@ -603,7 +615,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None, columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None, enforceSchema=None, emptyValue=None, locale=None, lineSep=None, - recursiveFileLookup=None): + pathGlobFilter=None, recursiveFileLookup=None): r"""Loads a CSV file stream and returns the result as a :class:`DataFrame`. This function will go through the input once to determine the input schema if @@ -706,6 +718,9 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non :param lineSep: defines the line separator that should be used for parsing. If None is set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``. Maximum length is 1 character. + :param pathGlobFilter: an optional glob pattern to only include files with paths matching + the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`. + It does not change the behavior of `partition discovery`_. :param recursiveFileLookup: recursively scan a directory for files. Using this option disables `partition discovery`_. @@ -726,7 +741,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine, charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, enforceSchema=enforceSchema, emptyValue=emptyValue, locale=locale, lineSep=lineSep, - recursiveFileLookup=recursiveFileLookup) + pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup) if isinstance(path, basestring): return self._df(self._jreader.csv(path)) else: diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 8570e4640fee..2d303b03ecd3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -98,9 +98,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * * * @since 1.4.0 @@ -138,9 +135,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * * * @since 1.4.0 @@ -157,9 +151,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * * * @since 1.4.0 @@ -403,6 +394,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * empty array/struct during schema inference. *
  • `locale` (default is `en-US`): sets a locale as language tag in IETF BCP 47 format. * For instance, this is used while parsing dates and timestamps.
  • + *
  • `pathGlobFilter`: an optional glob pattern to only include files with paths matching + * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. + * It does not change the behavior of partition discovery.
  • + *
  • `recursiveFileLookup`: recursively scan a directory for files. Using this option + * disables partition discovery
  • * * * @since 2.0.0 @@ -640,6 +636,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * For instance, this is used while parsing dates and timestamps. *
  • `lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator * that should be used for parsing. Maximum length is 1 character.
  • + *
  • `pathGlobFilter`: an optional glob pattern to only include files with paths matching + * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. + * It does not change the behavior of partition discovery.
  • + *
  • `recursiveFileLookup`: recursively scan a directory for files. Using this option + * disables partition discovery
  • * * * @since 2.0.0 @@ -666,7 +667,13 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { *
  • `mergeSchema` (default is the value specified in `spark.sql.parquet.mergeSchema`): sets * whether we should merge schemas collected from all Parquet part-files. This will override * `spark.sql.parquet.mergeSchema`.
  • + *
  • `pathGlobFilter`: an optional glob pattern to only include files with paths matching + * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. + * It does not change the behavior of partition discovery.
  • + *
  • `recursiveFileLookup`: recursively scan a directory for files. Using this option + * disables partition discovery
  • * + * * @since 1.4.0 */ @scala.annotation.varargs @@ -688,6 +695,18 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { /** * Loads ORC files and returns the result as a `DataFrame`. * + * You can set the following ORC-specific option(s) for reading ORC files: + * + * * @param paths input paths * @since 2.0.0 */ @@ -736,6 +755,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * *
  • `lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator * that should be used for parsing.
  • + *
  • `pathGlobFilter`: an optional glob pattern to only include files with paths matching + * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. + * It does not change the behavior of partition discovery.
  • + *
  • `recursiveFileLookup`: recursively scan a directory for files. Using this option + * disables partition discovery
  • * * * @param paths input paths @@ -771,13 +795,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * spark.read().textFile("/path/to/spark/README.md") * }}} * - * You can set the following textFile-specific option(s) for reading text files: - * + * You can set the text-specific options as specified in `DataFrameReader.text`. * * @param paths input path * @since 2.0.0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 4a6516d325dd..cfe6192e7d5c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -83,9 +83,6 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * * * @since 2.0.0 @@ -123,9 +120,6 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * * * @since 2.0.0 @@ -142,9 +136,6 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * * * @since 2.0.0 @@ -277,6 +268,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * empty array/struct during schema inference. *
  • `locale` (default is `en-US`): sets a locale as language tag in IETF BCP 47 format. * For instance, this is used while parsing dates and timestamps.
  • + *
  • `pathGlobFilter`: an optional glob pattern to only include files with paths matching + * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. + * It does not change the behavior of partition discovery.
  • + *
  • `recursiveFileLookup`: recursively scan a directory for files. Using this option + * disables partition discovery
  • * * * @since 2.0.0 @@ -357,6 +353,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * For instance, this is used while parsing dates and timestamps. *
  • `lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator * that should be used for parsing. Maximum length is 1 character.
  • + *
  • `pathGlobFilter`: an optional glob pattern to only include files with paths matching + * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. + * It does not change the behavior of partition discovery.
  • + *
  • `recursiveFileLookup`: recursively scan a directory for files. Using this option + * disables partition discovery
  • * * * @since 2.0.0 @@ -370,6 +371,14 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * * * @since 2.3.0 @@ -389,6 +398,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * whether we should merge schemas collected from all * Parquet part-files. This will override * `spark.sql.parquet.mergeSchema`. + *
  • `pathGlobFilter`: an optional glob pattern to only include files with paths matching + * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. + * It does not change the behavior of partition discovery.
  • + *
  • `recursiveFileLookup`: recursively scan a directory for files. Using this option + * disables partition discovery
  • * * * @since 2.0.0 @@ -419,6 +433,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * *
  • `lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator * that should be used for parsing.
  • + *
  • `pathGlobFilter`: an optional glob pattern to only include files with paths matching + * the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter. + * It does not change the behavior of partition discovery.
  • + *
  • `recursiveFileLookup`: recursively scan a directory for files. Using this option + * disables partition discovery
  • * * * @since 2.0.0 @@ -442,15 +461,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * spark.readStream().textFile("/path/to/spark/README.md") * }}} * - * You can set the following text-specific options to deal with text files: - * + * You can set the text-specific options as specified in `DataStreamReader.text`. * * @param path input path * @since 2.1.0