From 30926968434eb3f3d62e35a6df9488753900bce6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Dec 2019 18:02:35 -0800 Subject: [PATCH 1/2] REF: change parameter name fname-> path --- doc/source/whatsnew/v1.0.0.rst | 3 ++- pandas/core/frame.py | 37 ++++++++++++++++++++++++---------- pandas/io/feather_format.py | 7 ++++--- pandas/io/parquet.py | 21 ++++++++++++------- 4 files changed, 46 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c072bfeff4a72..802b207100398 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -502,7 +502,8 @@ Deprecations - :func:`pandas.json_normalize` is now exposed in the top-level namespace. Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). -- +- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) + .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 394d128164509..6f760e7ee4ca0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -37,7 +37,12 @@ from pandas._libs import algos as libalgos, lib from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_kwarg, + rewrite_axis_style_signature, +) from pandas.util._validators import ( validate_axis_style_args, validate_bool_kwarg, @@ -1829,9 +1834,10 @@ def _from_arrays(cls, arrays, columns, index, dtype=None): mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) return cls(mgr) + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_stata( self, - fname, + path, convert_dates=None, write_index=True, byteorder=None, @@ -1849,11 +1855,16 @@ def to_stata( Parameters ---------- - fname : str, buffer or path object + path : str, buffer or path object String, path object (pathlib.Path or py._path.local.LocalPath) or object implementing a binary write() function. If using a buffer then the buffer will not be automatically closed after the file data has been written. + + .. versionchanged:: 1.0.0 + + Previously this was "fname" + convert_dates : dict Dictionary mapping columns containing datetime types to stata internal format to use when writing the dates. Options are 'tc', @@ -1927,7 +1938,7 @@ def to_stata( kwargs["convert_strl"] = convert_strl writer = statawriter( - fname, + path, self, convert_dates=convert_dates, byteorder=byteorder, @@ -1939,22 +1950,24 @@ def to_stata( ) writer.write_file() - def to_feather(self, fname): + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_feather(self, path): """ Write out the binary feather-format for DataFrames. Parameters ---------- - fname : str + path : str String file path. """ from pandas.io.feather_format import to_feather - to_feather(self, fname) + to_feather(self, path) + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( self, - fname, + path, engine="auto", compression="snappy", index=None, @@ -1973,11 +1986,13 @@ def to_parquet( Parameters ---------- - fname : str + path : str File path or Root Directory path. Will be used as Root Directory path while writing a partitioned dataset. - .. versionchanged:: 0.24.0 + .. versionchanged:: 1.0.0 + + Previously this was "fname" engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' Parquet library to use. If 'auto', then the option @@ -2034,7 +2049,7 @@ def to_parquet( to_parquet( self, - fname, + path, engine, compression=compression, index=index, diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 01118d7b7cd3e..41bdf97c1fe1f 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -34,10 +34,11 @@ def to_feather(df: DataFrame, path): # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): + typ = type(df.index) raise ValueError( - "feather does not support serializing {} " + f"feather does not support serializing {typ} " "for the index; you can .reset_index() " - "to make the index into column(s)".format(type(df.index)) + "to make the index into column(s)" ) if not df.index.equals(RangeIndex.from_range(range(len(df)))): @@ -63,7 +64,7 @@ def to_feather(df: DataFrame, path): feather.write_feather(df, path) -def read_feather(path, columns=None, use_threads=True): +def read_feather(path, columns=None, use_threads: bool = True): """ Load a feather-format object from the file path. diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 54e44ff33d079..f68347f042086 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -46,7 +46,7 @@ def get_engine(engine: str) -> "BaseImpl": class BaseImpl: @staticmethod - def validate_dataframe(df): + def validate_dataframe(df: DataFrame): if not isinstance(df, DataFrame): raise ValueError("to_parquet only supports IO with DataFrames") @@ -62,7 +62,7 @@ def validate_dataframe(df): if not valid_names: raise ValueError("Index level names must be strings") - def write(self, df, path, compression, **kwargs): + def write(self, df: DataFrame, path, compression, **kwargs): raise AbstractMethodError(self) def read(self, path, columns=None, **kwargs): @@ -80,7 +80,7 @@ def __init__(self): def write( self, - df, + df: DataFrame, path, compression="snappy", coerce_timestamps="ms", @@ -137,7 +137,13 @@ def __init__(self): self.api = fastparquet def write( - self, df, path, compression="snappy", index=None, partition_cols=None, **kwargs + self, + df: DataFrame, + path, + compression="snappy", + index=None, + partition_cols=None, + **kwargs, ): self.validate_dataframe(df) # thriftpy/protocol/compact.py:339: @@ -196,9 +202,9 @@ def read(self, path, columns=None, **kwargs): def to_parquet( - df, + df: DataFrame, path, - engine="auto", + engine: str = "auto", compression="snappy", index: Optional[bool] = None, partition_cols=None, @@ -209,6 +215,7 @@ def to_parquet( Parameters ---------- + df : DataFrame path : str File path or Root Directory path. Will be used as Root Directory path while writing a partitioned dataset. @@ -255,7 +262,7 @@ def to_parquet( ) -def read_parquet(path, engine="auto", columns=None, **kwargs): +def read_parquet(path, engine: str = "auto", columns=None, **kwargs): """ Load a parquet object from the file path, returning a DataFrame. From 6eebfb2de95c5a7cac56affbbd951b0a1f4e83b4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 19 Dec 2019 08:19:04 -0800 Subject: [PATCH 2/2] doc fixup --- doc/source/user_guide/io.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index ae0f02312e1df..7f7b00ccfc167 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -4763,10 +4763,10 @@ Parquet supports partitioning of data based on the values of one or more columns .. ipython:: python df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]}) - df.to_parquet(fname='test', engine='pyarrow', + df.to_parquet(path='test', engine='pyarrow', partition_cols=['a'], compression=None) -The `fname` specifies the parent directory to which data will be saved. +The `path` specifies the parent directory to which data will be saved. The `partition_cols` are the column names by which the dataset will be partitioned. Columns are partitioned in the order they are given. The partition splits are determined by the unique values in the partition columns.