From a19e8d0b2418755134e0ea0f19bca05ba1226a78 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 1 Dec 2019 18:30:22 -0800 Subject: [PATCH 1/5] Make kwargs explicit in put, append --- pandas/core/generic.py | 6 +++ pandas/io/pytables.py | 95 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 89 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e19bf9c1c39ea..70bc2ae5beb1f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2412,6 +2412,8 @@ def to_hdf( complib: Optional[str] = None, append: bool_t = False, format: Optional[str] = None, + min_itemsize=None, + data_columns=None, errors: str = "strict", encoding: str = "UTF-8", **kwargs, @@ -2471,6 +2473,8 @@ def to_hdf( See the errors argument for :func:`open` for a full list of options. encoding : str, default "UTF-8" + min_itemsize : dict, optional + Map column names to minimum string sizes for columns. data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk queries, or True to use all columns. By default only the axes @@ -2530,6 +2534,8 @@ def to_hdf( complib=complib, append=append, format=format, + min_itemsize=min_itemsize, + data_columns=data_columns, errors=errors, encoding=encoding, **kwargs, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 39e9d467b652f..8e284d23be637 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -259,6 +259,8 @@ def to_hdf( complib: Optional[str] = None, append: bool = False, format: Optional[str] = None, + min_itemsize=None, + data_columns=None, errors: str = "strict", encoding: str = "UTF-8", **kwargs, @@ -267,11 +269,25 @@ def to_hdf( if append: f = lambda store: store.append( - key, value, format=format, errors=errors, encoding=encoding, **kwargs + key, + value, + format=format, + min_itemsize=min_itemsize, + data_columns=data_columns, + errors=errors, + encoding=encoding, + **kwargs, ) else: f = lambda store: store.put( - key, value, format=format, errors=errors, encoding=encoding, **kwargs + key, + value, + format=format, + min_itemsize=min_itemsize, + data_columns=data_columns, + errors=errors, + encoding=encoding, + **kwargs, ) path_or_buf = _stringify_path(path_or_buf) @@ -957,7 +973,22 @@ def func(_start, _stop, _where): return it.get_result(coordinates=True) - def put(self, key: str, value, format=None, append=False, **kwargs): + def put( + self, + key: str, + value: FrameOrSeries, + format=None, + index=True, + append=False, + complib=None, + complevel: Optional[int] = None, + min_itemsize=None, + nan_rep=None, + data_columns=None, + encoding=None, + errors: str = "strict", + **kwargs, + ): """ Store object in HDFStore. @@ -986,8 +1017,22 @@ def put(self, key: str, value, format=None, append=False, **kwargs): """ if format is None: format = get_option("io.hdf.default_format") or "fixed" - kwargs = self._validate_format(format, kwargs) - self._write_to_group(key, value, append=append, **kwargs) + format = self._validate_format(format) + self._write_to_group( + key, + value, + format=format, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + encoding=encoding, + errors=errors, + **kwargs, + ) def remove(self, key: str, where=None, start=None, stop=None): """ @@ -1046,11 +1091,21 @@ def remove(self, key: str, where=None, start=None, stop=None): def append( self, key: str, - value, + value: FrameOrSeries, format=None, + axes=None, + index=True, append=True, + complib=None, + complevel: Optional[int] = None, columns=None, + min_itemsize=None, + chunksize=None, + expectedrows=None, dropna: Optional[bool] = None, + data_columns=None, + encoding=None, + errors: str = "strict", **kwargs, ): """ @@ -1096,8 +1151,25 @@ def append( dropna = get_option("io.hdf.dropna_table") if format is None: format = get_option("io.hdf.default_format") or "table" - kwargs = self._validate_format(format, kwargs) - self._write_to_group(key, value, append=append, dropna=dropna, **kwargs) + format = self._validate_format(format) + self._write_to_group( + key, + value, + format=format, + axes=axes, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + chunksize=chunksize, + expectedrows=expectedrows, + dropna=dropna, + data_columns=data_columns, + encoding=encoding, + errors=errors, + **kwargs, + ) def append_to_multiple( self, @@ -1418,17 +1490,16 @@ def _check_if_open(self): if not self.is_open: raise ClosedFileError(f"{self._path} file is not open!") - def _validate_format(self, format: str, kwargs: Dict[str, Any]) -> Dict[str, Any]: + def _validate_format(self, format: str) -> str: """ validate / deprecate formats; return the new kwargs """ - kwargs = kwargs.copy() # validate try: - kwargs["format"] = _FORMAT_MAP[format.lower()] + format = _FORMAT_MAP[format.lower()] except KeyError: raise TypeError(f"invalid HDFStore format specified [{format}]") - return kwargs + return format def _create_storer( self, From 65b061123e0be83a321a6dc60b88a055779900bd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Dec 2019 08:20:57 -0800 Subject: [PATCH 2/5] docstring fix --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 70bc2ae5beb1f..1803093247382 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2473,7 +2473,7 @@ def to_hdf( See the errors argument for :func:`open` for a full list of options. encoding : str, default "UTF-8" - min_itemsize : dict, optional + min_itemsize : dict or int, optional Map column names to minimum string sizes for columns. data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk From 47ec657b8dde264409134cd9528c8df8ca4a0994 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Dec 2019 10:17:50 -0800 Subject: [PATCH 3/5] types, remove kwargs --- pandas/core/generic.py | 4 ++-- pandas/io/pytables.py | 18 +++++++----------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1803093247382..eab4562cd8302 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2412,8 +2412,8 @@ def to_hdf( complib: Optional[str] = None, append: bool_t = False, format: Optional[str] = None, - min_itemsize=None, - data_columns=None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + data_columns: Optional[List[str]] = None, errors: str = "strict", encoding: str = "UTF-8", **kwargs, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fefa96d6483dc..4e5ef6bc53bab 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -259,8 +259,8 @@ def to_hdf( complib: Optional[str] = None, append: bool = False, format: Optional[str] = None, - min_itemsize=None, - data_columns=None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + data_columns: Optional[List[str]] = None, errors: str = "strict", encoding: str = "UTF-8", **kwargs, @@ -980,12 +980,11 @@ def put( append=False, complib=None, complevel: Optional[int] = None, - min_itemsize=None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, nan_rep=None, - data_columns=None, + data_columns: Optional[List[str]] = None, encoding=None, errors: str = "strict", - **kwargs, ): """ Store object in HDFStore. @@ -1029,7 +1028,6 @@ def put( data_columns=data_columns, encoding=encoding, errors=errors, - **kwargs, ) def remove(self, key: str, where=None, start=None, stop=None): @@ -1097,14 +1095,13 @@ def append( complib=None, complevel: Optional[int] = None, columns=None, - min_itemsize=None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, chunksize=None, expectedrows=None, dropna: Optional[bool] = None, - data_columns=None, + data_columns: Optional[List[str]] = None, encoding=None, errors: str = "strict", - **kwargs, ): """ Append to Table in file. Node must already exist and be Table @@ -1166,7 +1163,6 @@ def append( data_columns=data_columns, encoding=encoding, errors=errors, - **kwargs, ) def append_to_multiple( @@ -1609,7 +1605,7 @@ def _write_to_group( complib=None, complevel: Optional[int] = None, fletcher32=None, - min_itemsize=None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, chunksize=None, expectedrows=None, dropna=False, From 9954d20c490816c42993726c1d9dac04ab0569e7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Dec 2019 10:30:06 -0800 Subject: [PATCH 4/5] remove kwargs --- pandas/core/generic.py | 15 ++++++++------- pandas/io/pytables.py | 11 ++++++++--- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eab4562cd8302..7eb29265cac97 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -27,7 +27,7 @@ from pandas._config import config -from pandas._libs import Timestamp, iNaT, properties +from pandas._libs import Timestamp, iNaT, lib, properties from pandas.compat import set_function_name from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv @@ -2412,11 +2412,12 @@ def to_hdf( complib: Optional[str] = None, append: bool_t = False, format: Optional[str] = None, + index: bool_t = True, min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=lib._no_default, data_columns: Optional[List[str]] = None, errors: str = "strict", encoding: str = "UTF-8", - **kwargs, ): """ Write the contained data to an HDF5 file using HDFStore. @@ -2475,15 +2476,14 @@ def to_hdf( encoding : str, default "UTF-8" min_itemsize : dict or int, optional Map column names to minimum string sizes for columns. + nan_rep : Any, optional + How to represent null values as str. + Not allowed with append=True. data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk queries, or True to use all columns. By default only the axes of the object are indexed. See :ref:`io.hdf5-query-data-columns`. Applicable only to format='table'. - fletcher32 : bool, default False - If applying compression use the fletcher32 checksum. - dropna : bool, default False - If true, ALL nan rows will not be written to store. See Also -------- @@ -2534,11 +2534,12 @@ def to_hdf( complib=complib, append=append, format=format, + index=index, min_itemsize=min_itemsize, + nan_rep=nan_rep, data_columns=data_columns, errors=errors, encoding=encoding, - **kwargs, ) def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4e5ef6bc53bab..ef25839b692a9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -259,35 +259,40 @@ def to_hdf( complib: Optional[str] = None, append: bool = False, format: Optional[str] = None, + index: bool = True, min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=lib._no_default, data_columns: Optional[List[str]] = None, errors: str = "strict", encoding: str = "UTF-8", - **kwargs, ): """ store this object, close it if we opened it """ if append: + if nan_rep is not lib._no_default: + raise ValueError("Cannot pass nan_rep with append=True") f = lambda store: store.append( key, value, format=format, + index=index, min_itemsize=min_itemsize, data_columns=data_columns, errors=errors, encoding=encoding, - **kwargs, ) else: + nan_rep = None if nan_rep is lib._no_default else nan_rep f = lambda store: store.put( key, value, format=format, + index=index, min_itemsize=min_itemsize, + nan_rep=nan_rep, data_columns=data_columns, errors=errors, encoding=encoding, - **kwargs, ) path_or_buf = _stringify_path(path_or_buf) From 5be49457b9680afd29c40be3986c143079f6a880 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Dec 2019 16:05:19 -0800 Subject: [PATCH 5/5] troubleshoot docs build --- pandas/core/generic.py | 6 ++++-- pandas/io/pytables.py | 11 +++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7eb29265cac97..8f8cfe7f6618b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -27,7 +27,7 @@ from pandas._config import config -from pandas._libs import Timestamp, iNaT, lib, properties +from pandas._libs import Timestamp, iNaT, properties from pandas.compat import set_function_name from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv @@ -2414,7 +2414,8 @@ def to_hdf( format: Optional[str] = None, index: bool_t = True, min_itemsize: Optional[Union[int, Dict[str, int]]] = None, - nan_rep=lib._no_default, + nan_rep=None, + dropna: Optional[bool_t] = None, data_columns: Optional[List[str]] = None, errors: str = "strict", encoding: str = "UTF-8", @@ -2537,6 +2538,7 @@ def to_hdf( index=index, min_itemsize=min_itemsize, nan_rep=nan_rep, + dropna=dropna, data_columns=data_columns, errors=errors, encoding=encoding, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fdd6f8e080160..6d64e5113bf44 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -261,7 +261,8 @@ def to_hdf( format: Optional[str] = None, index: bool = True, min_itemsize: Optional[Union[int, Dict[str, int]]] = None, - nan_rep=lib._no_default, + nan_rep=None, + dropna: Optional[bool] = None, data_columns: Optional[List[str]] = None, errors: str = "strict", encoding: str = "UTF-8", @@ -269,20 +270,20 @@ def to_hdf( """ store this object, close it if we opened it """ if append: - if nan_rep is not lib._no_default: - raise ValueError("Cannot pass nan_rep with append=True") f = lambda store: store.append( key, value, format=format, index=index, min_itemsize=min_itemsize, + nan_rep=nan_rep, + dropna=dropna, data_columns=data_columns, errors=errors, encoding=encoding, ) else: - nan_rep = None if nan_rep is lib._no_default else nan_rep + # NB: dropna is not passed to `put` f = lambda store: store.put( key, value, @@ -1123,6 +1124,7 @@ def append( complevel: Optional[int] = None, columns=None, min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=None, chunksize=None, expectedrows=None, dropna: Optional[bool] = None, @@ -1184,6 +1186,7 @@ def append( complib=complib, complevel=complevel, min_itemsize=min_itemsize, + nan_rep=nan_rep, chunksize=chunksize, expectedrows=expectedrows, dropna=dropna,