From 5a5b347187aa237569c970bf3d6d8c3edbd6e2dc Mon Sep 17 00:00:00 2001 From: Luca Ionescu Date: Wed, 18 Dec 2019 23:06:34 +0100 Subject: [PATCH 01/37] add original changes. --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/io/json/_json.py | 4 ++++ pandas/tests/io/json/test_pandas.py | 11 +++++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 6ad6b5129ef5a..c978a1825a390 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -501,6 +501,7 @@ Deprecations - :func:`pandas.json_normalize` is now exposed in the top-level namespace. Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). +- The ``numpy`` argument of :meth:`pandas.read_json` is deprecated (:issue:`28512`). - .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 30c1c2d59e983..f73a314d4da29 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -27,6 +27,7 @@ ) from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import _validate_integer +from pandas.util._decorators import deprecate_kwarg from ._normalize import convert_to_line_delimits from ._table_schema import build_table_schema, parse_table_schema @@ -353,6 +354,7 @@ def _write( return serialized +@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None) def read_json( path_or_buf=None, orient=None, @@ -466,6 +468,8 @@ def read_json( non-numeric column and index labels are supported. Note also that the JSON ordering MUST be the same for each term if numpy=True. + .. deprecated:: 1.0.0 + precise_float : bool, default False Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (False) is to use fast but diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index bce3d1de849aa..cce9a52e5077d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,6 +1,7 @@ from collections import OrderedDict from datetime import timedelta from io import StringIO +from warnings import catch_warnings, filterwarnings import json import os @@ -1601,3 +1602,13 @@ def test_json_indent_all_orients(self, orient, expected): def test_json_negative_indent_raises(self): with pytest.raises(ValueError, match="must be a nonnegative integer"): pd.DataFrame().to_json(indent=-1) + + @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") + def test_deprecate_numpy_argument_read_json(self): + # https://github.com/pandas-dev/pandas/issues/28512 + df = DataFrame([1, 2, 3]) + with tm.assert_produces_warning(None): + with catch_warnings(): + filterwarnings("ignore", category=FutureWarning) + result = read_json(df.to_json(), numpy=True) + assert_frame_equal(result, df) From 18bd98fde954806cf0247aa961cc2e9e567871ba Mon Sep 17 00:00:00 2001 From: proost Date: Thu, 19 Dec 2019 07:20:11 +0900 Subject: [PATCH 02/37] ENH: Add support for DataFrame(Categorical) (#11363) (#30305) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/frame.py | 2 +- pandas/tests/frame/test_constructors.py | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 6ad6b5129ef5a..7e623523a9fbe 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -206,6 +206,7 @@ Other enhancements now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`). - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) +- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) Build Changes diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b699961cf07e8..394d128164509 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -450,7 +450,7 @@ def __init__( # For data is list-like, or Iterable (will consume into list) elif isinstance(data, abc.Iterable) and not isinstance(data, (str, bytes)): - if not isinstance(data, abc.Sequence): + if not isinstance(data, (abc.Sequence, ExtensionArray)): data = list(data) if len(data) > 0: if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ad6e0c963e730..adec846802e66 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -25,6 +25,7 @@ date_range, isna, ) +from pandas.arrays import IntervalArray, PeriodArray from pandas.core.construction import create_series_with_explicit_dtype import pandas.util.testing as tm @@ -2396,6 +2397,21 @@ class List(list): result = DataFrame(List([List([1, 2, 3]), List([4, 5, 6])])) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "extension_arr", + [ + Categorical(list("aabbc")), + pd.SparseArray([1, np.nan, np.nan, np.nan]), + IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), + PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")), + ], + ) + def test_constructor_with_extension_array(self, extension_arr): + # GH11363 + expected = DataFrame(Series(extension_arr)) + result = DataFrame(extension_arr) + tm.assert_frame_equal(result, expected) + class TestDataFrameConstructorWithDatetimeTZ: def test_from_dict(self): From 416907d1c38c027d015ec1ba80923b1dbd84ebf6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 18 Dec 2019 16:57:04 -0600 Subject: [PATCH 03/37] DOC: whatsnew fixups (#30331) * Refer to public API * Fixed formatting * Remove ref to undocumented function --- doc/source/whatsnew/v1.0.0.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7e623523a9fbe..c072bfeff4a72 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -255,10 +255,10 @@ To update, use ``MultiIndex.set_names``, which returns a new ``MultiIndex``. mi2 = mi.set_names("new name", level=0) mi2.names -New repr for :class:`pandas.core.arrays.IntervalArray` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +New repr for :class:`~pandas.arrays.IntervalArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- :class:`pandas.core.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) +- :class:`pandas.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) *pandas 0.25.x* @@ -578,7 +578,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`) - :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` constructors no longer allow ``start``, ``end``, and ``periods`` keywords, use :func:`date_range`, :func:`timedelta_range`, and :func:`period_range` instead (:issue:`23919`) - :class:`DatetimeIndex` and :class:`TimedeltaIndex` constructors no longer have a ``verify_integrity`` keyword argument (:issue:`23919`) -- :func:`core.internals.blocks.make_block` no longer accepts the "fastpath" keyword(:issue:`19265`) +- ``pandas.core.internals.blocks.make_block`` no longer accepts the "fastpath" keyword(:issue:`19265`) - :meth:`Block.make_block_same_class` no longer accepts the "dtype" keyword(:issue:`19434`) - Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) - Removed the previously deprecated :meth:`MultiIndex.to_hierarchical` (:issue:`21613`) @@ -655,7 +655,7 @@ Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`) -- Performance improvement in `MultiIndex.is_monotonic` (:issue:`27495`) +- Performance improvement in :attr:`MultiIndex.is_monotonic` (:issue:`27495`) - Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`) - Performance improvement when initializing a :class:`DataFrame` using a ``range`` (:issue:`30171`) - Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`) From f36eac1718ef784ead396118aec6893d17e0e5e8 Mon Sep 17 00:00:00 2001 From: DorAmram Date: Thu, 19 Dec 2019 01:08:53 +0200 Subject: [PATCH 04/37] CLN: changed .format to f-string in pandas/core/dtypes (#30287) --- pandas/core/dtypes/common.py | 10 ++++------ pandas/core/dtypes/dtypes.py | 9 ++++----- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 602d7d0da95e6..8e629896fdb7b 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -193,9 +193,7 @@ def ensure_python_int(value: Union[int, np.integer]) -> int: TypeError: if the value isn't an int or can't be converted to one. """ if not is_scalar(value): - raise TypeError( - "Value needs to be a scalar value, was type {}".format(type(value)) - ) + raise TypeError(f"Value needs to be a scalar value, was type {type(value)}") msg = "Wrong type {} for value {}" try: new_value = int(value) @@ -1859,7 +1857,7 @@ def _validate_date_like_dtype(dtype) -> None: try: typ = np.datetime_data(dtype)[0] except ValueError as e: - raise TypeError("{error}".format(error=e)) + raise TypeError(e) if typ != "generic" and typ != "ns": raise ValueError( f"{repr(dtype.name)} is too specific of a frequency, " @@ -1900,7 +1898,7 @@ def pandas_dtype(dtype): npdtype = np.dtype(dtype) except SyntaxError: # np.dtype uses `eval` which can raise SyntaxError - raise TypeError("data type '{}' not understood".format(dtype)) + raise TypeError(f"data type '{dtype}' not understood") # Any invalid dtype (such as pd.Timestamp) should raise an error. # np.dtype(invalid_type).kind = 0 for such objects. However, this will @@ -1912,6 +1910,6 @@ def pandas_dtype(dtype): # here and `dtype` is an array return npdtype elif npdtype.kind == "O": - raise TypeError("dtype '{}' not understood".format(dtype)) + raise TypeError(f"dtype '{dtype}' not understood") return npdtype diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 77ec182be5ed4..6f8f6e8abbc0a 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -749,7 +749,7 @@ def construct_from_string(cls, string: str_type): raise TypeError("Cannot construct a 'DatetimeTZDtype'") def __str__(self) -> str_type: - return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) + return f"datetime64[{self.unit}, {self.tz}]" @property def name(self) -> str_type: @@ -890,7 +890,7 @@ def __str__(self) -> str_type: @property def name(self) -> str_type: - return "period[{freq}]".format(freq=self.freq.freqstr) + return f"period[{self.freq.freqstr}]" @property def na_value(self): @@ -1054,8 +1054,7 @@ def construct_from_string(cls, string): if its not possible """ if not isinstance(string, str): - msg = "a string needs to be passed, got type {typ}" - raise TypeError(msg.format(typ=type(string))) + raise TypeError(f"a string needs to be passed, got type {type(string)}") if string.lower() == "interval" or cls._match.search(string) is not None: return cls(string) @@ -1075,7 +1074,7 @@ def type(self): def __str__(self) -> str_type: if self.subtype is None: return "interval" - return "interval[{subtype}]".format(subtype=self.subtype) + return f"interval[{self.subtype}]" def __hash__(self) -> int: # make myself hashable From 70a083f044c6cbe153c610647b1224188e31b979 Mon Sep 17 00:00:00 2001 From: Brian Wignall Date: Thu, 19 Dec 2019 01:42:17 -0500 Subject: [PATCH 05/37] Fix typos, via a Levenshtein-style corrector (#30341) --- pandas/_config/config.py | 4 ++-- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/index.pyx | 2 +- pandas/_libs/lib.pyx | 2 +- pandas/_libs/src/klib/khash.h | 2 +- pandas/_libs/src/ujson/lib/ultrajsondec.c | 6 +++--- pandas/_libs/src/ujson/python/objToJSON.c | 2 +- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/core/arrays/sparse/dtype.py | 2 +- pandas/core/arrays/string_.py | 4 ++-- pandas/core/groupby/base.py | 2 +- pandas/core/ops/mask_ops.py | 2 +- pandas/io/formats/format.py | 2 +- pandas/io/gbq.py | 2 +- pandas/io/json/_json.py | 2 +- pandas/io/parsers.py | 2 +- pandas/tests/extension/json/array.py | 2 +- pandas/tests/indexes/datetimes/test_date_range.py | 4 ++-- pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/indexes/timedeltas/test_indexing.py | 4 ++-- pandas/tests/plotting/test_converter.py | 2 +- 21 files changed, 27 insertions(+), 27 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 9e74eb46f7b1f..6844df495547a 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -57,10 +57,10 @@ DeprecatedOption = namedtuple("DeprecatedOption", "key msg rkey removal_ver") RegisteredOption = namedtuple("RegisteredOption", "key defval doc validator cb") -# holds deprecated option metdata +# holds deprecated option metadata _deprecated_options: Dict[str, DeprecatedOption] = {} -# holds registered option metdata +# holds registered option metadata _registered_options: Dict[str, RegisteredOption] = {} # holds the current values for registered options diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 9e5fa75ebeceb..abb8a6d388d26 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -791,7 +791,7 @@ def group_quantile(ndarray[float64_t] out, out[i] = NaN else: # Calculate where to retrieve the desired value - # Casting to int will intentionaly truncate result + # Casting to int will intentionally truncate result idx = grp_start + (q * (non_na_sz - 1)) val = values[sort_arr[idx]] diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 0ed48efb03035..ac8172146d351 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -288,7 +288,7 @@ cdef class IndexEngine: def get_indexer_non_unique(self, targets): """ - Return an indexer suitable for takng from a non unique index + Return an indexer suitable for taking from a non unique index return the labels in the same order ast the target and a missing indexer into the targets (which correspond to the -1 indices in the results diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e9a486894fbf0..3f578a453aa1d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -510,7 +510,7 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): @cython.boundscheck(False) def array_equivalent_object(left: object[:], right: object[:]) -> bool: """ - Perform an element by element comparion on 1-d object arrays + Perform an element by element comparison on 1-d object arrays taking into account nan positions. """ cdef: diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h index 77ec519cc24da..bcf6350aa9090 100644 --- a/pandas/_libs/src/klib/khash.h +++ b/pandas/_libs/src/klib/khash.h @@ -498,7 +498,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) */ #define kh_n_buckets(h) ((h)->n_buckets) -/* More conenient interfaces */ +/* More convenient interfaces */ /*! @function @abstract Instantiate a hash set containing integer keys diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index a847b0f5d5102..26b00c0cacd31 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -150,7 +150,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { case '7': case '8': case '9': { - // FIXME: Check for arithemtic overflow here + // FIXME: Check for arithmetic overflow here // PERF: Don't do 64-bit arithmetic here unless we know we have // to intValue = intValue * 10ULL + (JSLONG)(chr - 48); @@ -235,7 +235,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { } BREAK_FRC_LOOP: - // FIXME: Check for arithemtic overflow here + // FIXME: Check for arithmetic overflow here ds->lastType = JT_DOUBLE; ds->start = offset; return ds->dec->newDouble( @@ -282,7 +282,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { } BREAK_EXP_LOOP: - // FIXME: Check for arithemtic overflow here + // FIXME: Check for arithmetic overflow here ds->lastType = JT_DOUBLE; ds->start = offset; return ds->dec->newDouble( diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 5d17d3a2d7bcb..37e9c36a85327 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -1632,7 +1632,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, sprintf(buf, "%" NPY_INT64_FMT, value); len = strlen(cLabel); } - } else { // Fallack to string representation + } else { // Fallback to string representation PyObject *str = PyObject_Str(item); if (str == NULL) { Py_DECREF(item); diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e4e7f65db8dea..a44f374264f09 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -324,7 +324,7 @@ class Timestamp(_Timestamp): Function is not implemented. Use pd.to_datetime(). """ - raise NotImplementedError("Timestamp.strptime() is not implmented." + raise NotImplementedError("Timestamp.strptime() is not implemented." "Use to_datetime() to parse date strings.") @classmethod diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 935f657416396..ce16a1620eed5 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -290,7 +290,7 @@ def update_dtype(self, dtype): Returns ------- SparseDtype - A new SparseDtype with the corret `dtype` and fill value + A new SparseDtype with the correct `dtype` and fill value for that `dtype`. Raises diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 0d30aa06cd466..de254f662bb32 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -86,7 +86,7 @@ def __from_arrow__(self, array): results = [] for arr in chunks: - # using _from_sequence to ensure None is convered to NA + # using _from_sequence to ensure None is converted to NA str_arr = StringArray._from_sequence(np.array(arr)) results.append(str_arr) @@ -153,7 +153,7 @@ class StringArray(PandasArray): ... ValueError: StringArray requires an object-dtype ndarray of strings. - For comparision methods, this returns a :class:`pandas.BooleanArray` + For comparison methods, this returns a :class:`pandas.BooleanArray` >>> pd.array(["a", None, "c"], dtype="string") == "a" diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index e088400b25f0f..700d8d503d086 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -1,5 +1,5 @@ """ -Provide basic components for groupby. These defintiions +Provide basic components for groupby. These definitions hold the whitelist of methods that are exposed on the SeriesGroupBy and the DataFrameGroupBy objects. """ diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py index fd91e78451da9..8fb81faf313d7 100644 --- a/pandas/core/ops/mask_ops.py +++ b/pandas/core/ops/mask_ops.py @@ -1,5 +1,5 @@ """ -Ops for masked ararys. +Ops for masked arrays. """ from typing import Optional, Union diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 109df6584641d..124bd31c8d308 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1640,7 +1640,7 @@ def _get_format_datetime64_from_values( """ given values and a date_format, return a string format """ if isinstance(values, np.ndarray) and values.ndim > 1: - # We don't actaully care about the order of values, and DatetimeIndex + # We don't actually care about the order of values, and DatetimeIndex # only accepts 1D values values = values.ravel() diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 8a4a72021eb43..d9711f4f4626a 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -65,7 +65,7 @@ def read_gbq( *New in version 0.2.0 of pandas-gbq*. dialect : str, default 'legacy' - Note: The default value is changing to 'standard' in a future verion. + Note: The default value is changing to 'standard' in a future version. SQL syntax dialect to use. Value can be one of: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 30c1c2d59e983..6cb811bb97755 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -314,7 +314,7 @@ def __init__( timedeltas = obj.select_dtypes(include=["timedelta"]).columns if len(timedeltas): obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) - # Convert PeriodIndex to datetimes before serialzing + # Convert PeriodIndex to datetimes before serializing if is_period_dtype(obj.index): obj.index = obj.index.to_timestamp() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index c87edcc602686..a887a537a2201 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3492,7 +3492,7 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): # 2) index_names (column names) # # Both must be non-null to ensure a successful construction. Otherwise, - # we have to create a generic emtpy Index. + # we have to create a generic empty Index. if (index_col is None or index_col is False) or index_names is None: index = Index([]) else: diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 46ca7bd8f760a..014581682ac59 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -183,7 +183,7 @@ def _values_for_factorize(self): def _values_for_argsort(self): # Disable NumPy's shape inference by including an empty tuple... - # If all the elemnts of self are the same size P, NumPy will + # If all the elements of self are the same size P, NumPy will # cast them to an (N, P) array, instead of an (N,) array of tuples. frozen = [()] + [tuple(x.items()) for x in self] return np.array(frozen, dtype=object)[1:] diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index f95137cd1bf88..36cdaa8a6029b 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -798,7 +798,7 @@ def test_daterange_bug_456(self): # GH #456 rng1 = bdate_range("12/5/2011", "12/5/2011") rng2 = bdate_range("12/2/2011", "12/5/2011") - rng2._data.freq = BDay() # TODO: shouldnt this already be set? + rng2._data.freq = BDay() # TODO: shouldn't this already be set? result = rng1.union(rng2) assert isinstance(result, DatetimeIndex) @@ -855,7 +855,7 @@ def test_daterange_bug_456(self): # GH #456 rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C") rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C") - rng2._data.freq = CDay() # TODO: shouldnt this already be set? + rng2._data.freq = CDay() # TODO: shouldn't this already be set? result = rng1.union(rng2) assert isinstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 08c14c36a195e..6e919571d1423 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1061,7 +1061,7 @@ class TestToDatetimeUnit: @pytest.mark.parametrize("cache", [True, False]) def test_unit(self, cache): # GH 11758 - # test proper behavior with erros + # test proper behavior with errors with pytest.raises(ValueError): to_datetime([1], unit="D", format="%Y%m%d", cache=cache) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index d24f91a2c9e13..17ab85033acfb 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -228,7 +228,7 @@ def test_insert(self): def test_delete(self): idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx") - # prserve freq + # preserve freq expected_0 = timedelta_range(start="2 Days", periods=4, freq="D", name="idx") expected_4 = timedelta_range(start="1 Days", periods=4, freq="D", name="idx") @@ -257,7 +257,7 @@ def test_delete(self): def test_delete_slice(self): idx = timedelta_range(start="1 days", periods=10, freq="D", name="idx") - # prserve freq + # preserve freq expected_0_2 = timedelta_range(start="4 days", periods=7, freq="D", name="idx") expected_7_9 = timedelta_range(start="1 days", periods=7, freq="D", name="idx") diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 5cea4fb5acca0..71a186dc2f3b0 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -84,7 +84,7 @@ def test_matplotlib_formatters(self): units = pytest.importorskip("matplotlib.units") # Can't make any assertion about the start state. - # We we check that toggling converters off remvoes it, and toggling it + # We we check that toggling converters off removes it, and toggling it # on restores it. with cf.option_context("plotting.matplotlib.register_converters", True): From 20e4c186191859dcde2437edacaee43d8d34dc46 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 19 Dec 2019 13:37:06 +0000 Subject: [PATCH 06/37] TYPING: Enable --check-untyped-defs for MyPy (#29493) --- setup.cfg | 229 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/setup.cfg b/setup.cfg index 62d9f2e6056bb..c7d3394568f9c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -122,6 +122,10 @@ skip = pandas/__init__.py,pandas/core/api.py [mypy] ignore_missing_imports=True no_implicit_optional=True +check_untyped_defs=True + +[mypy-pandas.tests.*] +check_untyped_defs=False [mypy-pandas.conftest] ignore_errors=True @@ -143,3 +147,228 @@ ignore_errors=True [mypy-pandas.tests.scalar.period.test_period] ignore_errors=True + +[mypy-pandas._version] +check_untyped_defs=False + +[mypy-pandas.core.arrays.boolean] +check_untyped_defs=False + +[mypy-pandas.core.arrays.categorical] +check_untyped_defs=False + +[mypy-pandas.core.arrays.integer] +check_untyped_defs=False + +[mypy-pandas.core.arrays.interval] +check_untyped_defs=False + +[mypy-pandas.core.arrays.sparse.array] +check_untyped_defs=False + +[mypy-pandas.core.base] +check_untyped_defs=False + +[mypy-pandas.core.computation.align] +check_untyped_defs=False + +[mypy-pandas.core.computation.eval] +check_untyped_defs=False + +[mypy-pandas.core.computation.expr] +check_untyped_defs=False + +[mypy-pandas.core.computation.expressions] +check_untyped_defs=False + +[mypy-pandas.core.computation.ops] +check_untyped_defs=False + +[mypy-pandas.core.computation.pytables] +check_untyped_defs=False + +[mypy-pandas.core.computation.scope] +check_untyped_defs=False + +[mypy-pandas.core.config_init] +check_untyped_defs=False + +[mypy-pandas.core.dtypes.cast] +check_untyped_defs=False + +[mypy-pandas.core.dtypes.generic] +check_untyped_defs=False + +[mypy-pandas.core.frame] +check_untyped_defs=False + +[mypy-pandas.core.generic] +check_untyped_defs=False + +[mypy-pandas.core.groupby.generic] +check_untyped_defs=False + +[mypy-pandas.core.groupby.grouper] +check_untyped_defs=False + +[mypy-pandas.core.groupby.ops] +check_untyped_defs=False + +[mypy-pandas.core.indexes.base] +check_untyped_defs=False + +[mypy-pandas.core.indexes.category] +check_untyped_defs=False + +[mypy-pandas.core.indexes.datetimelike] +check_untyped_defs=False + +[mypy-pandas.core.indexes.datetimes] +check_untyped_defs=False + +[mypy-pandas.core.indexes.interval] +check_untyped_defs=False + +[mypy-pandas.core.indexes.multi] +check_untyped_defs=False + +[mypy-pandas.core.indexes.timedeltas] +check_untyped_defs=False + +[mypy-pandas.core.indexing] +check_untyped_defs=False + +[mypy-pandas.core.internals.blocks] +check_untyped_defs=False + +[mypy-pandas.core.internals.concat] +check_untyped_defs=False + +[mypy-pandas.core.internals.construction] +check_untyped_defs=False + +[mypy-pandas.core.internals.managers] +check_untyped_defs=False + +[mypy-pandas.core.missing] +check_untyped_defs=False + +[mypy-pandas.core.nanops] +check_untyped_defs=False + +[mypy-pandas.core.ops.docstrings] +check_untyped_defs=False + +[mypy-pandas.core.resample] +check_untyped_defs=False + +[mypy-pandas.core.reshape.merge] +check_untyped_defs=False + +[mypy-pandas.core.reshape.reshape] +check_untyped_defs=False + +[mypy-pandas.core.series] +check_untyped_defs=False + +[mypy-pandas.core.strings] +check_untyped_defs=False + +[mypy-pandas.core.tools.datetimes] +check_untyped_defs=False + +[mypy-pandas.core.window.common] +check_untyped_defs=False + +[mypy-pandas.core.window.ewm] +check_untyped_defs=False + +[mypy-pandas.core.window.expanding] +check_untyped_defs=False + +[mypy-pandas.core.window.rolling] +check_untyped_defs=False + +[mypy-pandas.io.clipboard] +check_untyped_defs=False + +[mypy-pandas.io.excel._base] +check_untyped_defs=False + +[mypy-pandas.io.excel._openpyxl] +check_untyped_defs=False + +[mypy-pandas.io.excel._util] +check_untyped_defs=False + +[mypy-pandas.io.excel._xlwt] +check_untyped_defs=False + +[mypy-pandas.io.formats.console] +check_untyped_defs=False + +[mypy-pandas.io.formats.css] +check_untyped_defs=False + +[mypy-pandas.io.formats.excel] +check_untyped_defs=False + +[mypy-pandas.io.formats.format] +check_untyped_defs=False + +[mypy-pandas.io.formats.style] +check_untyped_defs=False + +[mypy-pandas.io.html] +check_untyped_defs=False + +[mypy-pandas.io.json._json] +check_untyped_defs=False + +[mypy-pandas.io.json._normalize] +check_untyped_defs=False + +[mypy-pandas.io.json._table_schema] +check_untyped_defs=False + +[mypy-pandas.io.parsers] +check_untyped_defs=False + +[mypy-pandas.io.pytables] +check_untyped_defs=False + +[mypy-pandas.io.sas.sas_xport] +check_untyped_defs=False + +[mypy-pandas.io.sas.sas7bdat] +check_untyped_defs=False + +[mypy-pandas.io.sas.sasreader] +check_untyped_defs=False + +[mypy-pandas.io.sql] +check_untyped_defs=False + +[mypy-pandas.io.stata] +check_untyped_defs=False + +[mypy-pandas.plotting._matplotlib.converter] +check_untyped_defs=False + +[mypy-pandas.plotting._matplotlib.core] +check_untyped_defs=False + +[mypy-pandas.plotting._matplotlib.misc] +check_untyped_defs=False + +[mypy-pandas.plotting._matplotlib.timeseries] +check_untyped_defs=False + +[mypy-pandas.tseries.holiday] +check_untyped_defs=False + +[mypy-pandas.tseries.offsets] +check_untyped_defs=False + +[mypy-pandas.util.testing] +check_untyped_defs=False From 53a0dfd41a65a33dd7b0963734b24c749212e625 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Thu, 19 Dec 2019 07:08:34 -0700 Subject: [PATCH 07/37] BUG: Fix infer_dtype_from_scalar to infer IntervalDtype (#30339) --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/dtypes/cast.py | 5 +++- pandas/tests/dtypes/cast/test_infer_dtype.py | 29 +++++++++++++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c072bfeff4a72..3d1ab08336be8 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -758,7 +758,7 @@ Interval ^^^^^^^^ - Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`) -- +- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`) Indexing ^^^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b398a197a4bc0..1ab21f18f3bdc 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -41,7 +41,7 @@ is_unsigned_integer_dtype, pandas_dtype, ) -from .dtypes import DatetimeTZDtype, ExtensionDtype, PeriodDtype +from .dtypes import DatetimeTZDtype, ExtensionDtype, IntervalDtype, PeriodDtype from .generic import ( ABCDataFrame, ABCDatetimeArray, @@ -601,6 +601,9 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False): if lib.is_period(val): dtype = PeriodDtype(freq=val.freq) val = val.ordinal + elif lib.is_interval(val): + subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] + dtype = IntervalDtype(subtype=subtype) return dtype, val diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index bf11b81af6f90..da2ef5260d070 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -10,7 +10,15 @@ ) from pandas.core.dtypes.common import is_dtype_equal -from pandas import Categorical, Period, Series, Timedelta, Timestamp, date_range +from pandas import ( + Categorical, + Interval, + Period, + Series, + Timedelta, + Timestamp, + date_range, +) import pandas.util.testing as tm @@ -107,6 +115,25 @@ def test_infer_from_scalar_tz(tz, pandas_dtype): assert val == exp_val +@pytest.mark.parametrize( + "left, right, subtype", + [ + (0, 1, "int64"), + (0.0, 1.0, "float64"), + (Timestamp(0), Timestamp(1), "datetime64[ns]"), + (Timestamp(0, tz="UTC"), Timestamp(1, tz="UTC"), "datetime64[ns, UTC]"), + (Timedelta(0), Timedelta(1), "timedelta64[ns]"), + ], +) +def test_infer_from_interval(left, right, subtype, closed, pandas_dtype): + # GH 30337 + interval = Interval(left, right, closed) + result_dtype, result_value = infer_dtype_from_scalar(interval, pandas_dtype) + expected_dtype = f"interval[{subtype}]" if pandas_dtype else np.object_ + assert result_dtype == expected_dtype + assert result_value == interval + + def test_infer_dtype_from_scalar_errors(): msg = "invalid ndarray passed to infer_dtype_from_scalar" From 5b25df2b578cdcb80c346a7d8b882e076dc67818 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 19 Dec 2019 11:09:20 -0600 Subject: [PATCH 08/37] API: Return BoolArray for string ops when backed by StringArray (#30239) * API: Return BoolArray for string ops --- doc/source/user_guide/text.rst | 9 ++++++++- pandas/core/strings.py | 27 ++++++++++++++++----------- pandas/tests/test_strings.py | 24 +++++++++++++++++++++++- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index ff0474dbecbb4..53c7a7437d55f 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -74,6 +74,7 @@ These are places where the behavior of ``StringDtype`` objects differ from l. For ``StringDtype``, :ref:`string accessor methods` that return **numeric** output will always return a nullable integer dtype, rather than either int or float dtype, depending on the presence of NA values. + Methods returning **boolean** output will return a nullable boolean dtype. .. ipython:: python @@ -89,7 +90,13 @@ l. For ``StringDtype``, :ref:`string accessor methods` s.astype(object).str.count("a") s.astype(object).dropna().str.count("a") - When NA values are present, the output dtype is float64. + When NA values are present, the output dtype is float64. Similarly for + methods returning boolean values. + + .. ipython:: python + + s.str.isdigit() + s.str.match("a") 2. Some string methods, like :meth:`Series.str.decode` are not available on ``StringArray`` because ``StringArray`` only holds strings, not diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 24e2e674f6ae3..98075a02cd712 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2,7 +2,7 @@ from functools import wraps import re import textwrap -from typing import TYPE_CHECKING, Any, Callable, Dict, List +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Type, Union import warnings import numpy as np @@ -142,7 +142,7 @@ def _map_stringarray( The value to use for missing values. By default, this is the original value (NA). dtype : Dtype - The result dtype to use. Specifying this aviods an intermediate + The result dtype to use. Specifying this avoids an intermediate object-dtype allocation. Returns @@ -152,14 +152,20 @@ def _map_stringarray( an ndarray. """ - from pandas.arrays import IntegerArray, StringArray + from pandas.arrays import IntegerArray, StringArray, BooleanArray mask = isna(arr) assert isinstance(arr, StringArray) arr = np.asarray(arr) - if is_integer_dtype(dtype): + if is_integer_dtype(dtype) or is_bool_dtype(dtype): + constructor: Union[Type[IntegerArray], Type[BooleanArray]] + if is_integer_dtype(dtype): + constructor = IntegerArray + else: + constructor = BooleanArray + na_value_is_na = isna(na_value) if na_value_is_na: na_value = 1 @@ -169,13 +175,13 @@ def _map_stringarray( mask.view("uint8"), convert=False, na_value=na_value, - dtype=np.dtype("int64"), + dtype=np.dtype(dtype), ) if not na_value_is_na: mask[:] = False - return IntegerArray(result, mask) + return constructor(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype @@ -183,7 +189,6 @@ def _map_stringarray( arr, func, mask.view("uint8"), convert=False, na_value=na_value ) return StringArray(result) - # TODO: BooleanArray else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes @@ -299,7 +304,7 @@ def str_count(arr, pat, flags=0): """ regex = re.compile(pat, flags=flags) f = lambda x: len(regex.findall(x)) - return _na_map(f, arr, dtype=int) + return _na_map(f, arr, dtype="int64") def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): @@ -1365,7 +1370,7 @@ def str_find(arr, sub, start=0, end=None, side="left"): else: f = lambda x: getattr(x, method)(sub, start, end) - return _na_map(f, arr, dtype=int) + return _na_map(f, arr, dtype="int64") def str_index(arr, sub, start=0, end=None, side="left"): @@ -1385,7 +1390,7 @@ def str_index(arr, sub, start=0, end=None, side="left"): else: f = lambda x: getattr(x, method)(sub, start, end) - return _na_map(f, arr, dtype=int) + return _na_map(f, arr, dtype="int64") def str_pad(arr, width, side="left", fillchar=" "): @@ -3210,7 +3215,7 @@ def rindex(self, sub, start=0, end=None): len, docstring=_shared_docs["len"], forbidden_types=None, - dtype=int, + dtype="int64", returns_string=False, ) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 2e651c0b35deb..ae7ab6addc3fb 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1825,7 +1825,7 @@ def test_extractall_same_as_extract_subject_index(self): def test_empty_str_methods(self): empty_str = empty = Series(dtype=object) - empty_int = Series(dtype=int) + empty_int = Series(dtype="int64") empty_bool = Series(dtype=bool) empty_bytes = Series(dtype=object) @@ -3526,6 +3526,12 @@ def test_string_array(any_string_method): assert result.dtype == "string" result = result.astype(object) + elif expected.dtype == "object" and lib.is_bool_array( + expected.values, skipna=True + ): + assert result.dtype == "boolean" + result = result.astype(object) + elif expected.dtype == "float" and expected.isna().any(): assert result.dtype == "Int64" result = result.astype("float") @@ -3551,3 +3557,19 @@ def test_string_array_numeric_integer_array(method, expected): result = getattr(s.str, method)("a") expected = Series(expected, dtype="Int64") tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method,expected", + [ + ("isdigit", [False, None, True]), + ("isalpha", [True, None, False]), + ("isalnum", [True, None, True]), + ("isdigit", [False, None, True]), + ], +) +def test_string_array_boolean_array(method, expected): + s = Series(["a", None, "1"], dtype="string") + result = getattr(s.str, method)() + expected = Series(expected, dtype="boolean") + tm.assert_series_equal(result, expected) From f8b9ce7f3f47aab06a943704639ae91cd281df17 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 19 Dec 2019 09:40:38 -0800 Subject: [PATCH 09/37] REF: change parameter name fname -> path (#30338) --- doc/source/user_guide/io.rst | 4 ++-- doc/source/whatsnew/v1.0.0.rst | 3 ++- pandas/core/frame.py | 37 ++++++++++++++++++++++++---------- pandas/io/feather_format.py | 7 ++++--- pandas/io/parquet.py | 21 ++++++++++++------- 5 files changed, 48 insertions(+), 24 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index ae0f02312e1df..7f7b00ccfc167 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -4763,10 +4763,10 @@ Parquet supports partitioning of data based on the values of one or more columns .. ipython:: python df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]}) - df.to_parquet(fname='test', engine='pyarrow', + df.to_parquet(path='test', engine='pyarrow', partition_cols=['a'], compression=None) -The `fname` specifies the parent directory to which data will be saved. +The `path` specifies the parent directory to which data will be saved. The `partition_cols` are the column names by which the dataset will be partitioned. Columns are partitioned in the order they are given. The partition splits are determined by the unique values in the partition columns. diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3d1ab08336be8..a31db9712d5b8 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -502,7 +502,8 @@ Deprecations - :func:`pandas.json_normalize` is now exposed in the top-level namespace. Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). -- +- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) + .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 394d128164509..6f760e7ee4ca0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -37,7 +37,12 @@ from pandas._libs import algos as libalgos, lib from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_kwarg, + rewrite_axis_style_signature, +) from pandas.util._validators import ( validate_axis_style_args, validate_bool_kwarg, @@ -1829,9 +1834,10 @@ def _from_arrays(cls, arrays, columns, index, dtype=None): mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) return cls(mgr) + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_stata( self, - fname, + path, convert_dates=None, write_index=True, byteorder=None, @@ -1849,11 +1855,16 @@ def to_stata( Parameters ---------- - fname : str, buffer or path object + path : str, buffer or path object String, path object (pathlib.Path or py._path.local.LocalPath) or object implementing a binary write() function. If using a buffer then the buffer will not be automatically closed after the file data has been written. + + .. versionchanged:: 1.0.0 + + Previously this was "fname" + convert_dates : dict Dictionary mapping columns containing datetime types to stata internal format to use when writing the dates. Options are 'tc', @@ -1927,7 +1938,7 @@ def to_stata( kwargs["convert_strl"] = convert_strl writer = statawriter( - fname, + path, self, convert_dates=convert_dates, byteorder=byteorder, @@ -1939,22 +1950,24 @@ def to_stata( ) writer.write_file() - def to_feather(self, fname): + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_feather(self, path): """ Write out the binary feather-format for DataFrames. Parameters ---------- - fname : str + path : str String file path. """ from pandas.io.feather_format import to_feather - to_feather(self, fname) + to_feather(self, path) + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( self, - fname, + path, engine="auto", compression="snappy", index=None, @@ -1973,11 +1986,13 @@ def to_parquet( Parameters ---------- - fname : str + path : str File path or Root Directory path. Will be used as Root Directory path while writing a partitioned dataset. - .. versionchanged:: 0.24.0 + .. versionchanged:: 1.0.0 + + Previously this was "fname" engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' Parquet library to use. If 'auto', then the option @@ -2034,7 +2049,7 @@ def to_parquet( to_parquet( self, - fname, + path, engine, compression=compression, index=index, diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 01118d7b7cd3e..41bdf97c1fe1f 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -34,10 +34,11 @@ def to_feather(df: DataFrame, path): # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): + typ = type(df.index) raise ValueError( - "feather does not support serializing {} " + f"feather does not support serializing {typ} " "for the index; you can .reset_index() " - "to make the index into column(s)".format(type(df.index)) + "to make the index into column(s)" ) if not df.index.equals(RangeIndex.from_range(range(len(df)))): @@ -63,7 +64,7 @@ def to_feather(df: DataFrame, path): feather.write_feather(df, path) -def read_feather(path, columns=None, use_threads=True): +def read_feather(path, columns=None, use_threads: bool = True): """ Load a feather-format object from the file path. diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 54e44ff33d079..f68347f042086 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -46,7 +46,7 @@ def get_engine(engine: str) -> "BaseImpl": class BaseImpl: @staticmethod - def validate_dataframe(df): + def validate_dataframe(df: DataFrame): if not isinstance(df, DataFrame): raise ValueError("to_parquet only supports IO with DataFrames") @@ -62,7 +62,7 @@ def validate_dataframe(df): if not valid_names: raise ValueError("Index level names must be strings") - def write(self, df, path, compression, **kwargs): + def write(self, df: DataFrame, path, compression, **kwargs): raise AbstractMethodError(self) def read(self, path, columns=None, **kwargs): @@ -80,7 +80,7 @@ def __init__(self): def write( self, - df, + df: DataFrame, path, compression="snappy", coerce_timestamps="ms", @@ -137,7 +137,13 @@ def __init__(self): self.api = fastparquet def write( - self, df, path, compression="snappy", index=None, partition_cols=None, **kwargs + self, + df: DataFrame, + path, + compression="snappy", + index=None, + partition_cols=None, + **kwargs, ): self.validate_dataframe(df) # thriftpy/protocol/compact.py:339: @@ -196,9 +202,9 @@ def read(self, path, columns=None, **kwargs): def to_parquet( - df, + df: DataFrame, path, - engine="auto", + engine: str = "auto", compression="snappy", index: Optional[bool] = None, partition_cols=None, @@ -209,6 +215,7 @@ def to_parquet( Parameters ---------- + df : DataFrame path : str File path or Root Directory path. Will be used as Root Directory path while writing a partitioned dataset. @@ -255,7 +262,7 @@ def to_parquet( ) -def read_parquet(path, engine="auto", columns=None, **kwargs): +def read_parquet(path, engine: str = "auto", columns=None, **kwargs): """ Load a parquet object from the file path, returning a DataFrame. From 8cbfd06c2fba0e085bc1c3e792ea35ff943289c6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 19 Dec 2019 09:41:46 -0800 Subject: [PATCH 10/37] CLN: make lookups explicit instead of using globals (#30343) --- pandas/io/pytables.py | 63 +++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 42 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 07bf30e51a763..35ba2dca8cf8a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -176,22 +176,6 @@ class DuplicateWarning(Warning): # formats _FORMAT_MAP = {"f": "fixed", "fixed": "fixed", "t": "table", "table": "table"} -# storer class map -_STORER_MAP = { - "series": "SeriesFixed", - "frame": "FrameFixed", -} - -# table class map -_TABLE_MAP = { - "generic_table": "GenericTable", - "appendable_series": "AppendableSeriesTable", - "appendable_multiseries": "AppendableMultiSeriesTable", - "appendable_frame": "AppendableFrameTable", - "appendable_multiframe": "AppendableMultiFrameTable", - "worm": "WORMTable", -} - # axes map _AXES_MAP = {DataFrame: [0]} @@ -1553,12 +1537,17 @@ def _create_storer( self, group, format=None, - value=None, + value: Optional[FrameOrSeries] = None, encoding: str = "UTF-8", errors: str = "strict", ) -> Union["GenericFixed", "Table"]: """ return a suitable class to operate """ + cls: Union[Type["GenericFixed"], Type["Table"]] + + if value is not None and not isinstance(value, (Series, DataFrame)): + raise TypeError("value must be None, Series, or DataFrame") + def error(t): # return instead of raising so mypy can tell where we are raising return TypeError( @@ -1587,10 +1576,7 @@ def error(t): ) else: _TYPE_MAP = {Series: "series", DataFrame: "frame"} - try: - pt = _TYPE_MAP[type(value)] - except KeyError: - raise error("_TYPE_MAP") + pt = _TYPE_MAP[type(value)] # we are actually a table if format == "table": @@ -1598,12 +1584,12 @@ def error(t): # a storer node if "table" not in pt: + _STORER_MAP = {"series": SeriesFixed, "frame": FrameFixed} try: - return globals()[_STORER_MAP[pt]]( - self, group, encoding=encoding, errors=errors - ) + cls = _STORER_MAP[pt] except KeyError: raise error("_STORER_MAP") + return cls(self, group, encoding=encoding, errors=errors) # existing node (and must be a table) if tt is None: @@ -1625,29 +1611,22 @@ def error(t): tt = "appendable_frame" elif index.nlevels > 1: tt = "appendable_multiframe" - elif pt == "wide_table": - tt = "appendable_panel" - elif pt == "ndim_table": - tt = "appendable_ndim" - - else: - - # distinguish between a frame/table - tt = "legacy_panel" - try: - fields = group.table._v_attrs.fields - if len(fields) == 1 and fields[0] == "value": - tt = "legacy_frame" - except IndexError: - pass + _TABLE_MAP = { + "generic_table": GenericTable, + "appendable_series": AppendableSeriesTable, + "appendable_multiseries": AppendableMultiSeriesTable, + "appendable_frame": AppendableFrameTable, + "appendable_multiframe": AppendableMultiFrameTable, + "worm": WORMTable, + } try: - return globals()[_TABLE_MAP[tt]]( - self, group, encoding=encoding, errors=errors - ) + cls = _TABLE_MAP[tt] except KeyError: raise error("_TABLE_MAP") + return cls(self, group, encoding=encoding, errors=errors) + def _write_to_group( self, key: str, From 2bfd10cfccdef109d7e4638a91fb90b90505f8ad Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 19 Dec 2019 09:42:19 -0800 Subject: [PATCH 11/37] REF: remove pytables Table.metadata (#30342) --- pandas/io/pytables.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 35ba2dca8cf8a..cda17eaa01881 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2012,9 +2012,6 @@ def maybe_set_size(self, min_itemsize=None): if min_itemsize is not None and self.typ.itemsize < min_itemsize: self.typ = _tables().StringCol(itemsize=min_itemsize, pos=self.pos) - def validate(self, handler, append): - self.validate_names() - def validate_names(self): pass @@ -3155,7 +3152,6 @@ def __init__( self.non_index_axes = [] self.values_axes = [] self.data_columns = [] - self.metadata = [] self.info = dict() self.nan_rep = None @@ -3355,7 +3351,6 @@ def set_attrs(self): self.attrs.encoding = self.encoding self.attrs.errors = self.errors self.attrs.levels = self.levels - self.attrs.metadata = self.metadata self.attrs.info = self.info def get_attrs(self): @@ -3369,7 +3364,6 @@ def get_attrs(self): self.levels = getattr(self.attrs, "levels", None) or [] self.index_axes = [a for a in self.indexables if a.is_an_indexable] self.values_axes = [a for a in self.indexables if not a.is_an_indexable] - self.metadata = getattr(self.attrs, "metadata", None) or [] def validate_version(self, where=None): """ are we trying to operate on an old version? """ @@ -3875,9 +3869,6 @@ def get_blk_items(mgr, blocks): # validate our min_itemsize self.validate_min_itemsize(min_itemsize) - # validate our metadata - self.metadata = [c.name for c in self.values_axes if c.metadata is not None] - # validate the axes if we have an existing table if validate: self.validate(existing_table) @@ -4106,7 +4097,7 @@ def write( ) for a in self.axes: - a.validate(self, append) + a.validate_names() if not self.is_exists: From 95e1a63dd3382db6663bc8a2b334b422b93dd7fe Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 19 Dec 2019 09:43:14 -0800 Subject: [PATCH 12/37] REF: pytables prepare to make _create_axes return a new object (#30344) --- pandas/io/pytables.py | 152 +++++++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 67 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index cda17eaa01881..d14b4ecf070a7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3205,13 +3205,13 @@ def validate(self, other): oax = ov[i] if sax != oax: raise ValueError( - f"invalid combinate of [{c}] on appending data " + f"invalid combination of [{c}] on appending data " f"[{sax}] vs current table [{oax}]" ) # should never get here raise Exception( - f"invalid combinate of [{c}] on appending data [{sv}] vs " + f"invalid combination of [{c}] on appending data [{sv}] vs " f"current table [{ov}]" ) @@ -3582,7 +3582,8 @@ def _read_axes( return results - def get_object(self, obj, transposed: bool): + @classmethod + def get_object(cls, obj, transposed: bool): """ return the data for this obj """ return obj @@ -3613,6 +3614,7 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): if isinstance(min_itemsize, dict): existing_data_columns = set(data_columns) + data_columns = list(data_columns) # ensure we do not modify data_columns.extend( [ k @@ -3624,10 +3626,10 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): # return valid columns in the order of our axis return [c for c in data_columns if c in axis_labels] - def create_axes( + def _create_axes( self, axes, - obj, + obj: DataFrame, validate: bool = True, nan_rep=None, data_columns=None, @@ -3652,16 +3654,16 @@ def create_axes( """ + if not isinstance(obj, DataFrame): + group = self.group._v_name + raise TypeError( + f"cannot properly create the storer for: [group->{group}," + f"value->{type(obj)}]" + ) + # set the default axes if needed if axes is None: - try: - axes = _AXES_MAP[type(obj)] - except KeyError: - group = self.group._v_name - raise TypeError( - f"cannot properly create the storer for: [group->{group}," - f"value->{type(obj)}]" - ) + axes = [0] # map axes to numbers axes = [obj._get_axis_number(a) for a in axes] @@ -3669,15 +3671,14 @@ def create_axes( # do we have an existing table (if so, use its axes & data_columns) if self.infer_axes(): existing_table = self.copy() - existing_table.infer_axes() - axes = [a.axis for a in existing_table.index_axes] - data_columns = existing_table.data_columns - nan_rep = existing_table.nan_rep - self.encoding = existing_table.encoding - self.errors = existing_table.errors - self.info = copy.copy(existing_table.info) + axes = [a.axis for a in self.index_axes] + data_columns = self.data_columns + nan_rep = self.nan_rep + new_info = self.info + # TODO: do we always have validate=True here? else: existing_table = None + new_info = self.info assert self.ndim == 2 # with next check, we must have len(axes) == 1 # currently support on ndim-1 axes @@ -3693,7 +3694,7 @@ def create_axes( if nan_rep is None: nan_rep = "nan" - # We construct the non-index-axis first, since that alters self.info + # We construct the non-index-axis first, since that alters new_info idx = [x for x in [0, 1] if x not in axes][0] a = obj.axes[idx] @@ -3711,7 +3712,7 @@ def create_axes( append_axis = exist_axis # the non_index_axes info - info = self.info.setdefault(idx, {}) + info = new_info.setdefault(idx, {}) info["names"] = list(a.names) info["type"] = type(a).__name__ @@ -3720,14 +3721,14 @@ def create_axes( # Now we can construct our new index axis idx = axes[0] a = obj.axes[idx] - name = obj._AXIS_NAMES[idx] - new_index = _convert_index(name, a, self.encoding, self.errors) + index_name = obj._AXIS_NAMES[idx] + new_index = _convert_index(index_name, a, self.encoding, self.errors) new_index.axis = idx # Because we are always 2D, there is only one new_index, so # we know it will have pos=0 new_index.set_pos(0) - new_index.update_info(self.info) + new_index.update_info(new_info) new_index.maybe_set_size(min_itemsize) # check for column conflicts new_index_axes = [new_index] @@ -3745,47 +3746,13 @@ def get_blk_items(mgr, blocks): transposed = new_index.axis == 1 # figure out data_columns and get out blocks - block_obj = self.get_object(obj, transposed)._consolidate() - blocks = block_obj._data.blocks - blk_items = get_blk_items(block_obj._data, blocks) - data_columns = self.validate_data_columns( data_columns, min_itemsize, new_non_index_axes ) - if len(data_columns): - axis, axis_labels = new_non_index_axes[0] - new_labels = Index(axis_labels).difference(Index(data_columns)) - mgr = block_obj.reindex(new_labels, axis=axis)._data - - blocks = list(mgr.blocks) - blk_items = get_blk_items(mgr, blocks) - for c in data_columns: - mgr = block_obj.reindex([c], axis=axis)._data - blocks.extend(mgr.blocks) - blk_items.extend(get_blk_items(mgr, mgr.blocks)) - - # reorder the blocks in the same order as the existing_table if we can - if existing_table is not None: - by_items = { - tuple(b_items.tolist()): (b, b_items) - for b, b_items in zip(blocks, blk_items) - } - new_blocks = [] - new_blk_items = [] - for ea in existing_table.values_axes: - items = tuple(ea.values) - try: - b, b_items = by_items.pop(items) - new_blocks.append(b) - new_blk_items.append(b_items) - except (IndexError, KeyError): - jitems = ",".join(pprint_thing(item) for item in items) - raise ValueError( - f"cannot match existing table structure for [{jitems}] " - "on appending data" - ) - blocks = new_blocks - blk_items = new_blk_items + block_obj = self.get_object(obj, transposed)._consolidate() + blocks, blk_items = self._get_blocks_and_items( + block_obj, existing_table, new_non_index_axes, data_columns + ) # add my values vaxes = [] @@ -3854,7 +3821,7 @@ def get_blk_items(mgr, blocks): dtype=dtype_name, data=data, ) - col.update_info(self.info) + col.update_info(new_info) vaxes.append(col) @@ -3873,6 +3840,55 @@ def get_blk_items(mgr, blocks): if validate: self.validate(existing_table) + @staticmethod + def _get_blocks_and_items( + block_obj, existing_table, new_non_index_axes, data_columns + ): + # Helper to clarify non-state-altering parts of _create_axes + + def get_blk_items(mgr, blocks): + return [mgr.items.take(blk.mgr_locs) for blk in blocks] + + blocks = block_obj._data.blocks + blk_items = get_blk_items(block_obj._data, blocks) + + if len(data_columns): + axis, axis_labels = new_non_index_axes[0] + new_labels = Index(axis_labels).difference(Index(data_columns)) + mgr = block_obj.reindex(new_labels, axis=axis)._data + + blocks = list(mgr.blocks) + blk_items = get_blk_items(mgr, blocks) + for c in data_columns: + mgr = block_obj.reindex([c], axis=axis)._data + blocks.extend(mgr.blocks) + blk_items.extend(get_blk_items(mgr, mgr.blocks)) + + # reorder the blocks in the same order as the existing_table if we can + if existing_table is not None: + by_items = { + tuple(b_items.tolist()): (b, b_items) + for b, b_items in zip(blocks, blk_items) + } + new_blocks = [] + new_blk_items = [] + for ea in existing_table.values_axes: + items = tuple(ea.values) + try: + b, b_items = by_items.pop(items) + new_blocks.append(b) + new_blk_items.append(b_items) + except (IndexError, KeyError): + jitems = ",".join(pprint_thing(item) for item in items) + raise ValueError( + f"cannot match existing table structure for [{jitems}] " + "on appending data" + ) + blocks = new_blocks + blk_items = new_blk_items + + return blocks, blk_items + def process_axes(self, obj, selection: "Selection", columns=None): """ process axes filters """ @@ -4087,7 +4103,7 @@ def write( self._handle.remove_node(self.group, "table") # create the axes - self.create_axes( + self._create_axes( axes=axes, obj=obj, validate=append, @@ -4306,7 +4322,8 @@ class AppendableFrameTable(AppendableTable): def is_transposed(self) -> bool: return self.index_axes[0].axis == 1 - def get_object(self, obj, transposed: bool): + @classmethod + def get_object(cls, obj, transposed: bool): """ these are written transposed """ if transposed: obj = obj.T @@ -4405,7 +4422,8 @@ class AppendableSeriesTable(AppendableFrameTable): def is_transposed(self) -> bool: return False - def get_object(self, obj, transposed: bool): + @classmethod + def get_object(cls, obj, transposed: bool): return obj def write(self, obj, data_columns=None, **kwargs): From e66a2c79163d649385ea4b3fd87c7de0bf13bdab Mon Sep 17 00:00:00 2001 From: hasnain2808 Date: Fri, 20 Dec 2019 03:07:19 +0530 Subject: [PATCH 13/37] CLN: format replaced with f-strings #29547 (#30355) --- pandas/core/indexers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 433bca940c028..209f889e809c3 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -144,9 +144,7 @@ def validate_indices(indices: np.ndarray, n: int) -> None: if len(indices): min_idx = indices.min() if min_idx < -1: - msg = "'indices' contains values less than allowed ({} < {})".format( - min_idx, -1 - ) + msg = f"'indices' contains values less than allowed ({min_idx} < -1)" raise ValueError(msg) max_idx = indices.max() From 011a667e2855c2d9d85b10e4764929613575179e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alp=20Ar=C4=B1bal?= Date: Fri, 20 Dec 2019 03:28:05 +0100 Subject: [PATCH 14/37] replace str.format with f-string (#30363) --- pandas/core/frame.py | 12 +++---- pandas/core/generic.py | 79 +++++++++++++++++------------------------- pandas/core/strings.py | 18 +++++----- 3 files changed, 44 insertions(+), 65 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6f760e7ee4ca0..766437dbad8f8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1745,7 +1745,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None): rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)], dtype=[('I', 'S2'), ('A', '>> index_dtypes = ">> index_dtypes = f">> df.to_records(index_dtypes=index_dtypes) rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)], dtype=[('I', 'S1'), ('A', ' None: ) msg = ( - "'{key}' is both {level_article} {level_type} level and " - "{label_article} {label_type} label, which is ambiguous." - ).format( - key=key, - level_article=level_article, - level_type=level_type, - label_article=label_article, - label_type=label_type, + f"'{key}' is both {level_article} {level_type} level and " + f"{label_article} {label_type} label, which is ambiguous." ) raise ValueError(msg) @@ -1731,12 +1718,8 @@ def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray: label_axis_name = "column" if axis == 0 else "index" raise ValueError( ( - "The {label_axis_name} label '{key}' " - "is not unique.{multi_message}" - ).format( - key=key, - label_axis_name=label_axis_name, - multi_message=multi_message, + f"The {label_axis_name} label '{key}' " + f"is not unique.{multi_message}" ) ) @@ -1780,8 +1763,8 @@ def _drop_labels_or_levels(self, keys, axis: int = 0): raise ValueError( ( "The following keys are not valid labels or " - "levels for axis {axis}: {invalid_keys}" - ).format(axis=axis, invalid_keys=invalid_keys) + f"levels for axis {axis}: {invalid_keys}" + ) ) # Compute levels and labels to drop @@ -1998,7 +1981,7 @@ def __setstate__(self, state): def __repr__(self) -> str: # string representation based upon iterating over self # (since, by definition, `PandasContainers` are iterable) - prepr = "[%s]" % ",".join(map(pprint_thing, self)) + prepr = f"[{','.join(map(pprint_thing, self))}]" return f"{type(self).__name__}({prepr})" def _repr_latex_(self): @@ -3946,13 +3929,13 @@ def _drop_axis(self, labels, axis, level=None, errors: str = "raise"): # GH 18561 MultiIndex.drop should raise if label is absent if errors == "raise" and indexer.all(): - raise KeyError("{} not found in axis".format(labels)) + raise KeyError(f"{labels} not found in axis") else: indexer = ~axis.isin(labels) # Check if label doesn't exist along axis labels_missing = (axis.get_indexer_for(labels) == -1).any() if errors == "raise" and labels_missing: - raise KeyError("{} not found in axis".format(labels)) + raise KeyError(f"{labels} not found in axis") slicer = [slice(None)] * self.ndim slicer[self._get_axis_number(axis_name)] = indexer @@ -4476,7 +4459,7 @@ def reindex(self, *args, **kwargs): if kwargs: raise TypeError( "reindex() got an unexpected keyword " - 'argument "{0}"'.format(list(kwargs.keys())[0]) + f'argument "{list(kwargs.keys())[0]}"' ) self._consolidate_inplace() @@ -5997,7 +5980,7 @@ def fillna( raise TypeError( '"value" parameter must be a scalar, dict ' "or Series, but you passed a " - '"{0}"'.format(type(value).__name__) + f'"{type(value).__name__}"' ) new_data = self._data.fillna( @@ -6781,9 +6764,9 @@ def interpolate( if method not in methods and not is_numeric_or_datetime: raise ValueError( "Index column must be numeric or datetime type when " - "using {method} method other than linear. " + f"using {method} method other than linear. " "Try setting a numeric or datetime index column before " - "interpolating.".format(method=method) + "interpolating." ) if isna(index).any(): @@ -9205,7 +9188,7 @@ def _tz_convert(ax, tz): ax = ax.set_levels(new_level, level=level) else: if level not in (None, 0, ax.name): - raise ValueError("The level {0} is not valid".format(level)) + raise ValueError(f"The level {level} is not valid") ax = _tz_convert(ax, tz) result = self._constructor(self._data, copy=copy) @@ -9375,7 +9358,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent): ax = ax.set_levels(new_level, level=level) else: if level not in (None, 0, ax.name): - raise ValueError("The level {0} is not valid".format(level)) + raise ValueError(f"The level {level} is not valid") ax = _tz_localize(ax, tz, ambiguous, nonexistent) result = self._constructor(self._data, copy=copy) @@ -10357,8 +10340,8 @@ def last_valid_index(self): def _doc_parms(cls): """Return a tuple of the doc parms.""" - axis_descr = "{%s}" % ", ".join( - "{0} ({1})".format(a, i) for i, a in enumerate(cls._AXIS_ORDERS) + axis_descr = ( + f"{{{', '.join(f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS))}}}" ) name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" name2 = cls.__name__ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 98075a02cd712..0ef39a685f1ce 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -113,7 +113,7 @@ def cat_safe(list_of_columns: List, sep: str): raise TypeError( "Concatenation requires list-likes containing only " "strings (or missing values). Offending values found in " - "column {}".format(dtype) + f"column {dtype}" ) from None return result @@ -1355,8 +1355,8 @@ def str_find(arr, sub, start=0, end=None, side="left"): """ if not isinstance(sub, str): - msg = "expected a string object, not {0}" - raise TypeError(msg.format(type(sub).__name__)) + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) if side == "left": method = "find" @@ -1375,8 +1375,8 @@ def str_find(arr, sub, start=0, end=None, side="left"): def str_index(arr, sub, start=0, end=None, side="left"): if not isinstance(sub, str): - msg = "expected a string object, not {0}" - raise TypeError(msg.format(type(sub).__name__)) + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) if side == "left": method = "index" @@ -1447,15 +1447,15 @@ def str_pad(arr, width, side="left", fillchar=" "): dtype: object """ if not isinstance(fillchar, str): - msg = "fillchar must be a character, not {0}" - raise TypeError(msg.format(type(fillchar).__name__)) + msg = f"fillchar must be a character, not {type(fillchar).__name__}" + raise TypeError(msg) if len(fillchar) != 1: raise TypeError("fillchar must be a character, not str") if not is_integer(width): - msg = "width must be of integer type, not {0}" - raise TypeError(msg.format(type(width).__name__)) + msg = f"width must be of integer type, not {type(width).__name__}" + raise TypeError(msg) if side == "left": f = lambda x: x.rjust(width, fillchar) From c521a4ebeacc9483074d160786155fae08087b29 Mon Sep 17 00:00:00 2001 From: Souvik Mandal Date: Fri, 20 Dec 2019 08:10:12 +0530 Subject: [PATCH 15/37] DOC: "Next" link from user_guide/io.rst goes to read_sql_table API page #30332 (#30348) --- doc/source/user_guide/io.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 7f7b00ccfc167..c32b009948fda 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -4828,7 +4828,6 @@ See also some :ref:`cookbook examples ` for some advanced strategi The key functions are: .. autosummary:: - :toctree: ../reference/api/ read_sql_table read_sql_query From b4343efa33c9456694bcfd6dee7bf5941288966b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 19 Dec 2019 21:39:04 -0800 Subject: [PATCH 16/37] CI: troubleshoot codecov (#30070) --- ci/run_tests.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index b91cfb3bed8cc..4c5dbabc81950 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -37,7 +37,8 @@ echo $PYTEST_CMD sh -c "$PYTEST_CMD" if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then + SHA=`git rev-parse HEAD` echo "uploading coverage" - echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME" - bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME + echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME -C $SHA" + bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME -C `git rev-parse HEAD` fi From 66038e9430b2e5a4c78166ee370af84dd1eedbad Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 03:56:24 -0800 Subject: [PATCH 17/37] BUG+TST: non-optimized apply_index and empty DatetimeIndex (#30336) --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/arrays/datetimes.py | 7 ++-- pandas/tests/tseries/offsets/test_offsets.py | 44 ++++++++++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a31db9712d5b8..a15d5b319fc82 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -712,7 +712,7 @@ Datetimelike - Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) -- +- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e41f2a840d151..10669b09cefec 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -794,9 +794,7 @@ def _add_offset(self, offset): values = self.tz_localize(None) else: values = self - result = offset.apply_index(values) - if self.tz is not None: - result = result.tz_localize(self.tz) + result = offset.apply_index(values).tz_localize(self.tz) except NotImplementedError: warnings.warn( @@ -804,6 +802,9 @@ def _add_offset(self, offset): PerformanceWarning, ) result = self.astype("O") + offset + if len(self) == 0: + # _from_sequence won't be able to infer self.tz + return type(self)._from_sequence(result).tz_localize(self.tz) return type(self)._from_sequence(result, freq="infer") diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 458d69c1d3216..6f628bf86829a 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -20,6 +20,7 @@ from pandas._libs.tslibs.offsets import ApplyTypeError import pandas.compat as compat from pandas.compat.numpy import np_datetime64_compat +from pandas.errors import PerformanceWarning from pandas.core.indexes.datetimes import DatetimeIndex, _to_M8, date_range from pandas.core.series import Series @@ -43,7 +44,10 @@ CBMonthBegin, CBMonthEnd, CDay, + CustomBusinessDay, CustomBusinessHour, + CustomBusinessMonthBegin, + CustomBusinessMonthEnd, DateOffset, Day, Easter, @@ -607,6 +611,46 @@ def test_add(self, offset_types, tz_naive_fixture): assert isinstance(result, Timestamp) assert result == expected_localize + def test_add_empty_datetimeindex(self, offset_types, tz_naive_fixture): + # GH#12724, GH#30336 + offset_s = self._get_offset(offset_types) + + dti = DatetimeIndex([], tz=tz_naive_fixture) + + warn = None + if isinstance( + offset_s, + ( + Easter, + WeekOfMonth, + LastWeekOfMonth, + CustomBusinessDay, + BusinessHour, + CustomBusinessHour, + CustomBusinessMonthBegin, + CustomBusinessMonthEnd, + FY5253, + FY5253Quarter, + ), + ): + # We don't have an optimized apply_index + warn = PerformanceWarning + + with tm.assert_produces_warning(warn): + result = dti + offset_s + tm.assert_index_equal(result, dti) + with tm.assert_produces_warning(warn): + result = offset_s + dti + tm.assert_index_equal(result, dti) + + dta = dti._data + with tm.assert_produces_warning(warn): + result = dta + offset_s + tm.assert_equal(result, dta) + with tm.assert_produces_warning(warn): + result = offset_s + dta + tm.assert_equal(result, dta) + def test_pickle_v0_15_2(self, datapath): offsets = { "DateOffset": DateOffset(years=1), From a9e2566bde43dbbab55e5940326a81c0afa60d33 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 04:27:13 -0800 Subject: [PATCH 18/37] REF: define NA_VALUES in libparsers (#30373) --- pandas/_libs/parsers.pyx | 21 ++++++++++++++++++++- pandas/io/common.py | 23 ----------------------- pandas/io/excel/_base.py | 4 ++-- pandas/io/parsers.py | 12 ++++++------ pandas/tests/io/parser/test_na_values.py | 6 +++--- 5 files changed, 31 insertions(+), 35 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index bb1493280dfd2..1b566af7a5437 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1367,7 +1367,26 @@ def _ensure_encoded(list lst): # common NA values # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', -_NA_VALUES = _ensure_encoded(list(icom._NA_VALUES)) +STR_NA_VALUES = { + "-1.#IND", + "1.#QNAN", + "1.#IND", + "-1.#QNAN", + "#N/A N/A", + "#N/A", + "N/A", + "n/a", + "NA", + "#NA", + "NULL", + "null", + "NaN", + "-NaN", + "nan", + "-nan", + "", +} +_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES)) def _maybe_upcast(arr): diff --git a/pandas/io/common.py b/pandas/io/common.py index a01011cd7d4e4..0159716248b11 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -47,29 +47,6 @@ lzma = _import_lzma() -# common NA values -# no longer excluding inf representations -# '1.#INF','-1.#INF', '1.#INF000000', -_NA_VALUES = { - "-1.#IND", - "1.#QNAN", - "1.#IND", - "-1.#QNAN", - "#N/A N/A", - "#N/A", - "N/A", - "n/a", - "NA", - "#NA", - "NULL", - "null", - "NaN", - "-NaN", - "nan", - "-nan", - "", -} - _VALID_URLS = set(uses_relative + uses_netloc + uses_params) _VALID_URLS.discard("") diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 81d3d46f78bdb..8368142c3633a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -6,6 +6,7 @@ from pandas._config import config +from pandas._libs.parsers import STR_NA_VALUES from pandas.errors import EmptyDataError from pandas.util._decorators import Appender @@ -14,7 +15,6 @@ from pandas.core.frame import DataFrame from pandas.io.common import ( - _NA_VALUES, _is_url, _stringify_path, _validate_header_arg, @@ -124,7 +124,7 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" - + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True Whether or not to include the default NaN values when parsing the data. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a887a537a2201..32d812637a067 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -17,6 +17,7 @@ import pandas._libs.lib as lib import pandas._libs.ops as libops import pandas._libs.parsers as parsers +from pandas._libs.parsers import STR_NA_VALUES from pandas._libs.tslibs import parsing from pandas.errors import ( AbstractMethodError, @@ -60,7 +61,6 @@ from pandas.core.tools import datetimes as tools from pandas.io.common import ( - _NA_VALUES, BaseIterator, UnicodeReader, UTF8Recoder, @@ -195,7 +195,7 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" - + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True Whether or not to include the default NaN values when parsing the data. @@ -3398,7 +3398,7 @@ def _clean_na_values(na_values, keep_default_na=True): if na_values is None: if keep_default_na: - na_values = _NA_VALUES + na_values = STR_NA_VALUES else: na_values = set() na_fvalues = set() @@ -3415,7 +3415,7 @@ def _clean_na_values(na_values, keep_default_na=True): v = [v] if keep_default_na: - v = set(v) | _NA_VALUES + v = set(v) | STR_NA_VALUES na_values[k] = v na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()} @@ -3424,7 +3424,7 @@ def _clean_na_values(na_values, keep_default_na=True): na_values = [na_values] na_values = _stringify_na_values(na_values) if keep_default_na: - na_values = na_values | _NA_VALUES + na_values = na_values | STR_NA_VALUES na_fvalues = _floatify_na_values(na_values) @@ -3575,7 +3575,7 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na): return na_values[col], na_fvalues[col] else: if keep_default_na: - return _NA_VALUES, set() + return STR_NA_VALUES, set() return set(), set() else: diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index f52c6b8858fd3..353d309a84823 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -7,11 +7,11 @@ import numpy as np import pytest +from pandas._libs.parsers import STR_NA_VALUES + from pandas import DataFrame, Index, MultiIndex import pandas.util.testing as tm -import pandas.io.common as com - def test_string_nas(all_parsers): parser = all_parsers @@ -99,7 +99,7 @@ def test_default_na_values(all_parsers): "#N/A N/A", "", } - assert _NA_VALUES == com._NA_VALUES + assert _NA_VALUES == STR_NA_VALUES parser = all_parsers nv = len(_NA_VALUES) From eadaa403be60df0d2852ac6ee2c5ed1ca78aeb7a Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 20 Dec 2019 14:59:05 +0000 Subject: [PATCH 19/37] [CLN] remove now-unnecessary td.skip_if_no(pathlib) (#30376) --- pandas/tests/io/pytables/test_store.py | 4 +--- pandas/tests/io/sas/test_sas7bdat.py | 4 +--- pandas/tests/io/test_common.py | 11 ++--------- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index d9a76fe97f813..3cd9d9cdd67d2 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -3,6 +3,7 @@ from distutils.version import LooseVersion from io import BytesIO import os +from pathlib import Path import re from warnings import catch_warnings, simplefilter @@ -4594,12 +4595,9 @@ def test_read_nokey_empty(self, setup_path): with pytest.raises(ValueError): read_hdf(path) - @td.skip_if_no("pathlib") def test_read_from_pathlib_path(self, setup_path): # GH11773 - from pathlib import Path - expected = DataFrame( np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") ) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index e37561c865c7a..49af18d2935ef 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -1,5 +1,6 @@ import io import os +from pathlib import Path import numpy as np import pytest @@ -68,10 +69,7 @@ def test_from_iterator(self): tm.assert_frame_equal(df, df0.iloc[2:5, :]) rdr.close() - @td.skip_if_no("pathlib") def test_path_pathlib(self): - from pathlib import Path - for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index a15eac89ecedb..d2633ea0676cd 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -4,6 +4,7 @@ from io import StringIO import mmap import os +from pathlib import Path import pytest @@ -27,14 +28,7 @@ def __fspath__(self): # Functions that consume a string path and return a string or path-like object -path_types = [str, CustomFSPath] - -try: - from pathlib import Path - - path_types.append(Path) -except ImportError: - pass +path_types = [str, CustomFSPath, Path] try: from py.path import local as LocalPath @@ -73,7 +67,6 @@ def test_expand_user_normal_path(self): assert expanded_name == filename assert os.path.expanduser(filename) == expanded_name - @td.skip_if_no("pathlib") def test_stringify_path_pathlib(self): rel_path = icom._stringify_path(Path(".")) assert rel_path == "." From 1be80ea4de67f10c50f169457e079185bf28d806 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 07:33:02 -0800 Subject: [PATCH 20/37] REF: directory for method-specific series/frame tests (#30362) --- pandas/tests/frame/methods/__init__.py | 7 +++++++ pandas/tests/frame/{ => methods}/test_asof.py | 0 pandas/tests/frame/{ => methods}/test_explode.py | 0 pandas/tests/frame/{ => methods}/test_quantile.py | 0 pandas/tests/frame/{ => methods}/test_rank.py | 0 pandas/tests/frame/{ => methods}/test_replace.py | 0 pandas/tests/series/methods/__init__.py | 7 +++++++ pandas/tests/series/{ => methods}/test_asof.py | 0 pandas/tests/series/{ => methods}/test_explode.py | 0 pandas/tests/series/{ => methods}/test_quantile.py | 0 pandas/tests/series/{ => methods}/test_rank.py | 0 pandas/tests/series/{ => methods}/test_replace.py | 0 12 files changed, 14 insertions(+) create mode 100644 pandas/tests/frame/methods/__init__.py rename pandas/tests/frame/{ => methods}/test_asof.py (100%) rename pandas/tests/frame/{ => methods}/test_explode.py (100%) rename pandas/tests/frame/{ => methods}/test_quantile.py (100%) rename pandas/tests/frame/{ => methods}/test_rank.py (100%) rename pandas/tests/frame/{ => methods}/test_replace.py (100%) create mode 100644 pandas/tests/series/methods/__init__.py rename pandas/tests/series/{ => methods}/test_asof.py (100%) rename pandas/tests/series/{ => methods}/test_explode.py (100%) rename pandas/tests/series/{ => methods}/test_quantile.py (100%) rename pandas/tests/series/{ => methods}/test_rank.py (100%) rename pandas/tests/series/{ => methods}/test_replace.py (100%) diff --git a/pandas/tests/frame/methods/__init__.py b/pandas/tests/frame/methods/__init__.py new file mode 100644 index 0000000000000..245594bfdc9e7 --- /dev/null +++ b/pandas/tests/frame/methods/__init__.py @@ -0,0 +1,7 @@ +""" +Test files dedicated to individual (stand-alone) DataFrame methods + +Ideally these files/tests should correspond 1-to-1 with tests.series.methods + +These may also present opportunities for sharing/de-duplicating test code. +""" diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/methods/test_asof.py similarity index 100% rename from pandas/tests/frame/test_asof.py rename to pandas/tests/frame/methods/test_asof.py diff --git a/pandas/tests/frame/test_explode.py b/pandas/tests/frame/methods/test_explode.py similarity index 100% rename from pandas/tests/frame/test_explode.py rename to pandas/tests/frame/methods/test_explode.py diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py similarity index 100% rename from pandas/tests/frame/test_quantile.py rename to pandas/tests/frame/methods/test_quantile.py diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/methods/test_rank.py similarity index 100% rename from pandas/tests/frame/test_rank.py rename to pandas/tests/frame/methods/test_rank.py diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/methods/test_replace.py similarity index 100% rename from pandas/tests/frame/test_replace.py rename to pandas/tests/frame/methods/test_replace.py diff --git a/pandas/tests/series/methods/__init__.py b/pandas/tests/series/methods/__init__.py new file mode 100644 index 0000000000000..bcb0d30f405e2 --- /dev/null +++ b/pandas/tests/series/methods/__init__.py @@ -0,0 +1,7 @@ +""" +Test files dedicated to individual (stand-alone) Series methods + +Ideally these files/tests should correspond 1-to-1 with tests.frame.methods + +These may also present opportunities for sharing/de-duplicating test code. +""" diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/methods/test_asof.py similarity index 100% rename from pandas/tests/series/test_asof.py rename to pandas/tests/series/methods/test_asof.py diff --git a/pandas/tests/series/test_explode.py b/pandas/tests/series/methods/test_explode.py similarity index 100% rename from pandas/tests/series/test_explode.py rename to pandas/tests/series/methods/test_explode.py diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/methods/test_quantile.py similarity index 100% rename from pandas/tests/series/test_quantile.py rename to pandas/tests/series/methods/test_quantile.py diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/methods/test_rank.py similarity index 100% rename from pandas/tests/series/test_rank.py rename to pandas/tests/series/methods/test_rank.py diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/methods/test_replace.py similarity index 100% rename from pandas/tests/series/test_replace.py rename to pandas/tests/series/methods/test_replace.py From a6b047a87ff6bcd3377b1addd0cddbce3564c9dc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 07:33:54 -0800 Subject: [PATCH 21/37] REF: refactor cumulative op tests from test_analytics (#30358) --- pandas/tests/frame/test_analytics.py | 106 ------------------ pandas/tests/frame/test_cumulative.py | 120 +++++++++++++++++++++ pandas/tests/series/test_analytics.py | 111 ------------------- pandas/tests/series/test_cumulative.py | 142 +++++++++++++++++++++++++ 4 files changed, 262 insertions(+), 217 deletions(-) create mode 100644 pandas/tests/frame/test_cumulative.py create mode 100644 pandas/tests/series/test_cumulative.py diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index cef389a6c4167..0653c9dc5f91b 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1495,112 +1495,6 @@ def test_sum_bools(self): bools = isna(df) assert bools.sum(axis=1)[0] == 10 - # --------------------------------------------------------------------- - # Cumulative Reductions - cumsum, cummax, ... - - def test_cumsum_corner(self): - dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) - # ?(wesm) - result = dm.cumsum() # noqa - - def test_cumsum(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumsum = datetime_frame.cumsum() - expected = datetime_frame.apply(Series.cumsum) - tm.assert_frame_equal(cumsum, expected) - - # axis = 1 - cumsum = datetime_frame.cumsum(axis=1) - expected = datetime_frame.apply(Series.cumsum, axis=1) - tm.assert_frame_equal(cumsum, expected) - - # works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - result = df.cumsum() # noqa - - # fix issue - cumsum_xs = datetime_frame.cumsum(axis=1) - assert np.shape(cumsum_xs) == np.shape(datetime_frame) - - def test_cumprod(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumprod = datetime_frame.cumprod() - expected = datetime_frame.apply(Series.cumprod) - tm.assert_frame_equal(cumprod, expected) - - # axis = 1 - cumprod = datetime_frame.cumprod(axis=1) - expected = datetime_frame.apply(Series.cumprod, axis=1) - tm.assert_frame_equal(cumprod, expected) - - # fix issue - cumprod_xs = datetime_frame.cumprod(axis=1) - assert np.shape(cumprod_xs) == np.shape(datetime_frame) - - # ints - df = datetime_frame.fillna(0).astype(int) - df.cumprod(0) - df.cumprod(1) - - # ints32 - df = datetime_frame.fillna(0).astype(np.int32) - df.cumprod(0) - df.cumprod(1) - - def test_cummin(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummin = datetime_frame.cummin() - expected = datetime_frame.apply(Series.cummin) - tm.assert_frame_equal(cummin, expected) - - # axis = 1 - cummin = datetime_frame.cummin(axis=1) - expected = datetime_frame.apply(Series.cummin, axis=1) - tm.assert_frame_equal(cummin, expected) - - # it works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - result = df.cummin() # noqa - - # fix issue - cummin_xs = datetime_frame.cummin(axis=1) - assert np.shape(cummin_xs) == np.shape(datetime_frame) - - def test_cummax(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummax = datetime_frame.cummax() - expected = datetime_frame.apply(Series.cummax) - tm.assert_frame_equal(cummax, expected) - - # axis = 1 - cummax = datetime_frame.cummax(axis=1) - expected = datetime_frame.apply(Series.cummax, axis=1) - tm.assert_frame_equal(cummax, expected) - - # it works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - result = df.cummax() # noqa - - # fix issue - cummax_xs = datetime_frame.cummax(axis=1) - assert np.shape(cummax_xs) == np.shape(datetime_frame) - # --------------------------------------------------------------------- # Miscellanea diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py new file mode 100644 index 0000000000000..ad2cbff888b2e --- /dev/null +++ b/pandas/tests/frame/test_cumulative.py @@ -0,0 +1,120 @@ +""" +Tests for DataFrame cumulative operations + +See also +-------- +tests.series.test_cumulative +""" + +import numpy as np + +from pandas import DataFrame, Series +import pandas.util.testing as tm + + +class TestDataFrameCumulativeOps: + # --------------------------------------------------------------------- + # Cumulative Operations - cumsum, cummax, ... + + def test_cumsum_corner(self): + dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) + # TODO(wesm): do something with this? + result = dm.cumsum() # noqa + + def test_cumsum(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumsum = datetime_frame.cumsum() + expected = datetime_frame.apply(Series.cumsum) + tm.assert_frame_equal(cumsum, expected) + + # axis = 1 + cumsum = datetime_frame.cumsum(axis=1) + expected = datetime_frame.apply(Series.cumsum, axis=1) + tm.assert_frame_equal(cumsum, expected) + + # works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + df.cumsum() + + # fix issue + cumsum_xs = datetime_frame.cumsum(axis=1) + assert np.shape(cumsum_xs) == np.shape(datetime_frame) + + def test_cumprod(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumprod = datetime_frame.cumprod() + expected = datetime_frame.apply(Series.cumprod) + tm.assert_frame_equal(cumprod, expected) + + # axis = 1 + cumprod = datetime_frame.cumprod(axis=1) + expected = datetime_frame.apply(Series.cumprod, axis=1) + tm.assert_frame_equal(cumprod, expected) + + # fix issue + cumprod_xs = datetime_frame.cumprod(axis=1) + assert np.shape(cumprod_xs) == np.shape(datetime_frame) + + # ints + df = datetime_frame.fillna(0).astype(int) + df.cumprod(0) + df.cumprod(1) + + # ints32 + df = datetime_frame.fillna(0).astype(np.int32) + df.cumprod(0) + df.cumprod(1) + + def test_cummin(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummin = datetime_frame.cummin() + expected = datetime_frame.apply(Series.cummin) + tm.assert_frame_equal(cummin, expected) + + # axis = 1 + cummin = datetime_frame.cummin(axis=1) + expected = datetime_frame.apply(Series.cummin, axis=1) + tm.assert_frame_equal(cummin, expected) + + # it works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + df.cummin() + + # fix issue + cummin_xs = datetime_frame.cummin(axis=1) + assert np.shape(cummin_xs) == np.shape(datetime_frame) + + def test_cummax(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummax = datetime_frame.cummax() + expected = datetime_frame.apply(Series.cummax) + tm.assert_frame_equal(cummax, expected) + + # axis = 1 + cummax = datetime_frame.cummax(axis=1) + expected = datetime_frame.apply(Series.cummax, axis=1) + tm.assert_frame_equal(cummax, expected) + + # it works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + df.cummax() + + # fix issue + cummax_xs = datetime_frame.cummax(axis=1) + assert np.shape(cummax_xs) == np.shape(datetime_frame) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 0eb4e8a6cfdf3..148c376eba752 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -4,7 +4,6 @@ import numpy as np import pytest -from pandas.compat.numpy import _np_version_under1p18 import pandas.util._test_decorators as td import pandas as pd @@ -125,116 +124,6 @@ def test_argsort_stable(self): with pytest.raises(AssertionError, match=msg): tm.assert_numpy_array_equal(qindexer, mindexer) - def test_cumsum(self, datetime_series): - self._check_accum_op("cumsum", datetime_series) - - def test_cumprod(self, datetime_series): - self._check_accum_op("cumprod", datetime_series) - - def test_cummin(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummin().values, - np.minimum.accumulate(np.array(datetime_series)), - ) - ts = datetime_series.copy() - ts[::2] = np.NaN - result = ts.cummin()[1::2] - expected = np.minimum.accumulate(ts.dropna()) - - tm.assert_series_equal(result, expected) - - def test_cummax(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummax().values, - np.maximum.accumulate(np.array(datetime_series)), - ) - ts = datetime_series.copy() - ts[::2] = np.NaN - result = ts.cummax()[1::2] - expected = np.maximum.accumulate(ts.dropna()) - - tm.assert_series_equal(result, expected) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummin_datetime64(self): - s = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) - ) - - expected = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"]) - ) - result = s.cummin(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"] - ) - ) - result = s.cummin(skipna=False) - tm.assert_series_equal(expected, result) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummax_datetime64(self): - s = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) - ) - - expected = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"]) - ) - result = s.cummax(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"] - ) - ) - result = s.cummax(skipna=False) - tm.assert_series_equal(expected, result) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummin_timedelta64(self): - s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"]) - ) - result = s.cummin(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"]) - ) - result = s.cummin(skipna=False) - tm.assert_series_equal(expected, result) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummax_timedelta64(self): - s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"]) - ) - result = s.cummax(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"]) - ) - result = s.cummax(skipna=False) - tm.assert_series_equal(expected, result) - def test_np_diff(self): pytest.skip("skipping due to Series no longer being an ndarray") diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py new file mode 100644 index 0000000000000..a31cc9d968f3a --- /dev/null +++ b/pandas/tests/series/test_cumulative.py @@ -0,0 +1,142 @@ +""" +Tests for Series cumulative operations. + +See also +-------- +tests.frame.test_cumulative +""" +import numpy as np +import pytest + +from pandas.compat.numpy import _np_version_under1p18 + +import pandas as pd +import pandas.util.testing as tm + + +def _check_accum_op(name, series, check_dtype=True): + func = getattr(np, name) + tm.assert_numpy_array_equal( + func(series).values, func(np.array(series)), check_dtype=check_dtype, + ) + + # with missing values + ts = series.copy() + ts[::2] = np.NaN + + result = func(ts)[1::2] + expected = func(np.array(ts.dropna())) + + tm.assert_numpy_array_equal(result.values, expected, check_dtype=False) + + +class TestSeriesCumulativeOps: + def test_cumsum(self, datetime_series): + _check_accum_op("cumsum", datetime_series) + + def test_cumprod(self, datetime_series): + _check_accum_op("cumprod", datetime_series) + + def test_cummin(self, datetime_series): + tm.assert_numpy_array_equal( + datetime_series.cummin().values, + np.minimum.accumulate(np.array(datetime_series)), + ) + ts = datetime_series.copy() + ts[::2] = np.NaN + result = ts.cummin()[1::2] + expected = np.minimum.accumulate(ts.dropna()) + + tm.assert_series_equal(result, expected) + + def test_cummax(self, datetime_series): + tm.assert_numpy_array_equal( + datetime_series.cummax().values, + np.maximum.accumulate(np.array(datetime_series)), + ) + ts = datetime_series.copy() + ts[::2] = np.NaN + result = ts.cummax()[1::2] + expected = np.maximum.accumulate(ts.dropna()) + + tm.assert_series_equal(result, expected) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummin_datetime64(self): + s = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) + ) + + expected = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"]) + ) + result = s.cummin(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"] + ) + ) + result = s.cummin(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummax_datetime64(self): + s = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) + ) + + expected = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"]) + ) + result = s.cummax(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"] + ) + ) + result = s.cummax(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummin_timedelta64(self): + s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"]) + ) + result = s.cummin(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"]) + ) + result = s.cummin(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummax_timedelta64(self): + s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"]) + ) + result = s.cummax(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"]) + ) + result = s.cummax(skipna=False) + tm.assert_series_equal(expected, result) From 929684938bdbc952a9ed0f171eb6b7dbd095bf31 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Fri, 20 Dec 2019 07:47:15 -0800 Subject: [PATCH 22/37] Cleaned up Tempita refs and Cython import (#30330) --- setup.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index 45f3af3d5c374..c6b078dae280a 100755 --- a/setup.py +++ b/setup.py @@ -63,24 +63,15 @@ def is_platform_mac(): from distutils.extension import Extension # noqa: E402 isort:skip from distutils.command.build import build # noqa: E402 isort:skip -try: - if not _CYTHON_INSTALLED: - raise ImportError("No supported version of Cython installed.") +if _CYTHON_INSTALLED: from Cython.Distutils.old_build_ext import old_build_ext as _build_ext cython = True -except ImportError: + from Cython import Tempita as tempita +else: from distutils.command.build_ext import build_ext as _build_ext cython = False -else: - try: - try: - from Cython import Tempita as tempita - except ImportError: - import tempita - except ImportError: - raise ImportError("Building pandas requires Tempita: pip install Tempita") _pxi_dep_template = { From 6efc237980c39a61d297823910475a694258d5b3 Mon Sep 17 00:00:00 2001 From: Petr Baev Date: Fri, 20 Dec 2019 18:49:52 +0300 Subject: [PATCH 23/37] CLN: Old string formatting: .format() -> f"" (#30328) --- pandas/core/indexing.py | 36 +++++---------- pandas/core/internals/blocks.py | 64 +++++++++++---------------- pandas/core/internals/construction.py | 13 +++--- pandas/core/internals/managers.py | 28 ++++++------ pandas/core/resample.py | 26 +++++------ 5 files changed, 68 insertions(+), 99 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b31973de5bca0..b86293e78a80d 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -232,7 +232,7 @@ def _has_valid_tuple(self, key: Tuple): except ValueError: raise ValueError( "Location based indexing can only have " - "[{types}] types".format(types=self._valid_types) + f"[{self._valid_types}] types" ) def _is_nested_tuple_indexer(self, tup: Tuple) -> bool: @@ -286,7 +286,7 @@ def _has_valid_positional_setitem_indexer(self, indexer) -> bool: bool """ if isinstance(indexer, dict): - raise IndexError("{0} cannot enlarge its target object".format(self.name)) + raise IndexError(f"{self.name} cannot enlarge its target object") else: if not isinstance(indexer, tuple): indexer = _tuplify(self.ndim, indexer) @@ -300,13 +300,10 @@ def _has_valid_positional_setitem_indexer(self, indexer) -> bool: elif is_integer(i): if i >= len(ax): raise IndexError( - "{name} cannot enlarge its target " - "object".format(name=self.name) + f"{self.name} cannot enlarge its target object" ) elif isinstance(i, dict): - raise IndexError( - "{name} cannot enlarge its target object".format(name=self.name) - ) + raise IndexError(f"{self.name} cannot enlarge its target object") return True @@ -1166,17 +1163,14 @@ def _validate_read_indexer( if missing: if missing == len(indexer): - raise KeyError( - "None of [{key}] are in the [{axis}]".format( - key=key, axis=self.obj._get_axis_name(axis) - ) - ) + axis_name = self.obj._get_axis_name(axis) + raise KeyError(f"None of [{key}] are in the [{axis_name}]") # We (temporarily) allow for some missing keys with .loc, except in # some cases (e.g. setting) in which "raise_missing" will be False if not (self.name == "loc" and not raise_missing): not_found = list(set(key) - set(ax)) - raise KeyError("{} not in index".format(not_found)) + raise KeyError(f"{not_found} not in index") # we skip the warning on Categorical/Interval # as this check is actually done (check for @@ -1905,18 +1899,13 @@ def _validate_key(self, key, axis: int): # check that the key has a numeric dtype if not is_numeric_dtype(arr.dtype): - raise IndexError( - ".iloc requires numeric indexers, got {arr}".format(arr=arr) - ) + raise IndexError(f".iloc requires numeric indexers, got {arr}") # check that the key does not exceed the maximum size of the index if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis): raise IndexError("positional indexers are out-of-bounds") else: - raise ValueError( - "Can only index by location with " - "a [{types}]".format(types=self._valid_types) - ) + raise ValueError(f"Can only index by location with a [{self._valid_types}]") def _has_valid_setitem_indexer(self, indexer): self._has_valid_positional_setitem_indexer(indexer) @@ -2063,10 +2052,7 @@ def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): self._validate_key(obj, axis) return obj except ValueError: - raise ValueError( - "Can only index by location with " - "a [{types}]".format(types=self._valid_types) - ) + raise ValueError(f"Can only index by location with a [{self._valid_types}]") class _ScalarAccessIndexer(_NDFrameIndexerBase): @@ -2327,7 +2313,7 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: # GH26658 if len(result) != len(index): raise IndexError( - "Item wrong length {} instead of {}.".format(len(result), len(index)) + f"Item wrong length {len(result)} instead of {len(index)}." ) return result diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 610a39a05148b..eb5b5181d894d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -115,8 +115,8 @@ def __init__(self, values, placement, ndim=None): if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values): raise ValueError( - "Wrong number of items passed {val}, placement implies " - "{mgr}".format(val=len(self.values), mgr=len(self.mgr_locs)) + f"Wrong number of items passed {len(self.values)}, " + f"placement implies {len(self.mgr_locs)}" ) def _check_ndim(self, values, ndim): @@ -144,9 +144,10 @@ def _check_ndim(self, values, ndim): ndim = values.ndim if self._validate_ndim and values.ndim != ndim: - msg = "Wrong number of dimensions. values.ndim != ndim [{} != {}]" - raise ValueError(msg.format(values.ndim, ndim)) - + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim != ndim [{values.ndim} != {ndim}]" + ) return ndim @property @@ -184,7 +185,7 @@ def is_categorical_astype(self, dtype): if dtype is Categorical or dtype is CategoricalDtype: # this is a pd.Categorical, but is not # a valid type for astypeing - raise TypeError("invalid type {0} for astype".format(dtype)) + raise TypeError(f"invalid type {dtype} for astype") elif is_categorical_dtype(dtype): return True @@ -264,18 +265,14 @@ def __repr__(self) -> str: name = type(self).__name__ if self._is_single_block: - result = "{name}: {len} dtype: {dtype}".format( - name=name, len=len(self), dtype=self.dtype - ) + result = f"{name}: {len(self)} dtype: {self.dtype}" else: shape = " x ".join(pprint_thing(s) for s in self.shape) - result = "{name}: {index}, {shape}, dtype: {dtype}".format( - name=name, - index=pprint_thing(self.mgr_locs.indexer), - shape=shape, - dtype=self.dtype, + result = ( + f"{name}: {pprint_thing(self.mgr_locs.indexer)}, " + f"{shape}, dtype: {self.dtype}" ) return result @@ -329,7 +326,7 @@ def ftype(self): dtype = self.dtype.subtype else: dtype = self.dtype - return "{dtype}:{ftype}".format(dtype=dtype, ftype=self._ftype) + return f"{dtype}:{self._ftype}" def merge(self, other): return _merge_blocks([self, other]) @@ -544,15 +541,15 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): if errors not in errors_legal_values: invalid_arg = ( - "Expected value of kwarg 'errors' to be one of {}. " - "Supplied value is '{}'".format(list(errors_legal_values), errors) + "Expected value of kwarg 'errors' to be one of " + f"{list(errors_legal_values)}. Supplied value is '{errors}'" ) raise ValueError(invalid_arg) if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): msg = ( - "Expected an instance of {}, but got the class instead. " - "Try instantiating 'dtype'.".format(dtype.__name__) + f"Expected an instance of {dtype.__name__}, " + "but got the class instead. Try instantiating 'dtype'." ) raise TypeError(msg) @@ -613,15 +610,9 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): if newb.is_numeric and self.is_numeric: if newb.shape != self.shape: raise TypeError( - "cannot set astype for copy = [{copy}] for dtype " - "({dtype} [{shape}]) to different shape " - "({newb_dtype} [{newb_shape}])".format( - copy=copy, - dtype=self.dtype.name, - shape=self.shape, - newb_dtype=newb.dtype.name, - newb_shape=newb.shape, - ) + f"cannot set astype for copy = [{copy}] for dtype " + f"({self.dtype.name} [{self.shape}]) to different shape " + f"({newb.dtype.name} [{newb.shape}])" ) return newb @@ -658,7 +649,7 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): if not self.is_object and not quoting: itemsize = writers.word_len(na_rep) - values = values.astype(" str: output = type(self).__name__ for i, ax in enumerate(self.axes): if i == 0: - output += "\nItems: {ax}".format(ax=ax) + output += f"\nItems: {ax}" else: - output += "\nAxis {i}: {ax}".format(i=i, ax=ax) + output += f"\nAxis {i}: {ax}" for block in self.blocks: - output += "\n{block}".format(block=pprint_thing(block)) + output += f"\n{pprint_thing(block)}" return output def _verify_integrity(self): @@ -336,8 +336,8 @@ def _verify_integrity(self): if len(self.items) != tot_items: raise AssertionError( "Number of manager items must equal union of " - "block items\n# manager items: {0}, # " - "tot_items: {1}".format(len(self.items), tot_items) + f"block items\n# manager items: {len(self.items)}, # " + f"tot_items: {tot_items}" ) def apply(self, f: str, filter=None, **kwargs): @@ -1140,7 +1140,7 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False): """ if not allow_duplicates and item in self.items: # Should this be a different kind of error?? - raise ValueError("cannot insert {}, already exists".format(item)) + raise ValueError(f"cannot insert {item}, already exists") if not isinstance(loc, int): raise TypeError("loc must be int") @@ -1661,9 +1661,7 @@ def construction_error(tot_items, block_shape, axes, e=None): raise e if block_shape[0] == 0: raise ValueError("Empty data passed with indices specified.") - raise ValueError( - "Shape of passed values is {0}, indices imply {1}".format(passed, implied) - ) + raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") # ----------------------------------------------------------------------- @@ -1899,10 +1897,10 @@ def _compare_or_regex_search(a, b, regex=False): type_names = [type(a).__name__, type(b).__name__] if is_a_array: - type_names[0] = "ndarray(dtype={dtype})".format(dtype=a.dtype) + type_names[0] = f"ndarray(dtype={a.dtype})" if is_b_array: - type_names[1] = "ndarray(dtype={dtype})".format(dtype=b.dtype) + type_names[1] = f"ndarray(dtype={b.dtype})" raise TypeError( f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 67f06ea7bea6a..2294c846e81c7 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -90,13 +90,11 @@ def __str__(self) -> str: Provide a nice str repr of our rolling object. """ attrs = ( - "{k}={v}".format(k=k, v=getattr(self.groupby, k)) + f"{k}={getattr(self.groupby, k)}" for k in self._attributes if getattr(self.groupby, k, None) is not None ) - return "{klass} [{attrs}]".format( - klass=type(self).__name__, attrs=", ".join(attrs) - ) + return f"{type(self).__name__} [{', '.join(attrs)}]" def __getattr__(self, attr): if attr in self._internal_names_set: @@ -1188,8 +1186,8 @@ def _downsample(self, how, **kwargs): return self.asfreq() raise IncompatibleFrequency( - "Frequency {} cannot be resampled to {}, as they are not " - "sub or super periods".format(ax.freq, self.freq) + f"Frequency {ax.freq} cannot be resampled to {self.freq}, " + "as they are not sub or super periods" ) def _upsample(self, method, limit=None, fill_value=None): @@ -1333,11 +1331,11 @@ def __init__( # Check for correctness of the keyword arguments which would # otherwise silently use the default if misspelled if label not in {None, "left", "right"}: - raise ValueError("Unsupported value {} for `label`".format(label)) + raise ValueError(f"Unsupported value {label} for `label`") if closed not in {None, "left", "right"}: - raise ValueError("Unsupported value {} for `closed`".format(closed)) + raise ValueError(f"Unsupported value {closed} for `closed`") if convention not in {None, "start", "end", "e", "s"}: - raise ValueError("Unsupported value {} for `convention`".format(convention)) + raise ValueError(f"Unsupported value {convention} for `convention`") freq = to_offset(freq) @@ -1407,7 +1405,7 @@ def _get_resampler(self, obj, kind=None): raise TypeError( "Only valid with DatetimeIndex, " "TimedeltaIndex or PeriodIndex, " - "but got an instance of '{typ}'".format(typ=type(ax).__name__) + f"but got an instance of '{type(ax).__name__}'" ) def _get_grouper(self, obj, validate=True): @@ -1420,7 +1418,7 @@ def _get_time_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError( "axis must be a DatetimeIndex, but got " - "an instance of {typ}".format(typ=type(ax).__name__) + f"an instance of {type(ax).__name__}" ) if len(ax) == 0: @@ -1496,7 +1494,7 @@ def _get_time_delta_bins(self, ax): if not isinstance(ax, TimedeltaIndex): raise TypeError( "axis must be a TimedeltaIndex, but got " - "an instance of {typ}".format(typ=type(ax).__name__) + f"an instance of {type(ax).__name__}" ) if not len(ax): @@ -1521,7 +1519,7 @@ def _get_time_period_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError( "axis must be a DatetimeIndex, but got " - "an instance of {typ}".format(typ=type(ax).__name__) + f"an instance of {type(ax).__name__}" ) freq = self.freq @@ -1543,7 +1541,7 @@ def _get_period_bins(self, ax): if not isinstance(ax, PeriodIndex): raise TypeError( "axis must be a PeriodIndex, but got " - "an instance of {typ}".format(typ=type(ax).__name__) + f"an instance of {type(ax).__name__}" ) memb = ax.asfreq(self.freq, how=self.convention) From 0df88587529e7695558eb7ee970d984bb5d315a8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 09:44:52 -0800 Subject: [PATCH 24/37] de-privatize io.common functions (#30368) --- pandas/io/common.py | 34 ++++++++++++++--------------- pandas/io/excel/_base.py | 14 ++++++------ pandas/io/feather_format.py | 6 ++--- pandas/io/formats/csvs.py | 14 ++++++------ pandas/io/formats/excel.py | 4 ++-- pandas/io/formats/format.py | 4 ++-- pandas/io/formats/html.py | 4 ++-- pandas/io/html.py | 10 ++++----- pandas/io/json/_json.py | 14 ++++++------ pandas/io/parsers.py | 14 ++++++------ pandas/io/pickle.py | 10 ++++----- pandas/io/pytables.py | 8 +++---- pandas/io/sas/sasreader.py | 4 ++-- pandas/io/stata.py | 6 ++--- pandas/tests/frame/test_to_csv.py | 4 ++-- pandas/tests/io/test_common.py | 18 +++++++-------- pandas/tests/io/test_compression.py | 6 ++--- pandas/tests/series/test_io.py | 4 ++-- 18 files changed, 89 insertions(+), 89 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 0159716248b11..7151a34cd37de 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -64,7 +64,7 @@ def __next__(self): raise AbstractMethodError(self) -def _is_url(url) -> bool: +def is_url(url) -> bool: """ Check to see if a URL has a valid protocol. @@ -102,7 +102,7 @@ def _expand_user( return filepath_or_buffer -def _validate_header_arg(header) -> None: +def validate_header_arg(header) -> None: if isinstance(header, bool): raise TypeError( "Passing a bool to header is invalid. " @@ -112,7 +112,7 @@ def _validate_header_arg(header) -> None: ) -def _stringify_path( +def stringify_path( filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: """Attempt to convert a path-like object to a string. @@ -193,9 +193,9 @@ def get_filepath_or_buffer( compression, str, should_close, bool) """ - filepath_or_buffer = _stringify_path(filepath_or_buffer) + filepath_or_buffer = stringify_path(filepath_or_buffer) - if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): + if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer): req = urlopen(filepath_or_buffer) content_encoding = req.headers.get("Content-Encoding", None) if content_encoding == "gzip": @@ -250,7 +250,7 @@ def file_path_to_url(path: str) -> str: _compression_to_extension = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz"} -def _get_compression_method( +def get_compression_method( compression: Optional[Union[str, Mapping[str, str]]] ) -> Tuple[Optional[str], Dict[str, str]]: """ @@ -283,7 +283,7 @@ def _get_compression_method( return compression, compression_args -def _infer_compression( +def infer_compression( filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] ) -> Optional[str]: """ @@ -317,7 +317,7 @@ def _infer_compression( # Infer compression if compression == "infer": # Convert all path types (e.g. pathlib.Path) to strings - filepath_or_buffer = _stringify_path(filepath_or_buffer) + filepath_or_buffer = stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): # Cannot infer compression of a buffer, assume no compression return None @@ -338,7 +338,7 @@ def _infer_compression( raise ValueError(msg) -def _get_handle( +def get_handle( path_or_buf, mode: str, encoding=None, @@ -396,12 +396,12 @@ def _get_handle( f = path_or_buf # Convert pathlib.Path/py.path.local or string - path_or_buf = _stringify_path(path_or_buf) + path_or_buf = stringify_path(path_or_buf) is_path = isinstance(path_or_buf, str) - compression, compression_args = _get_compression_method(compression) + compression, compression_args = get_compression_method(compression) if is_path: - compression = _infer_compression(path_or_buf, compression) + compression = infer_compression(path_or_buf, compression) if compression: @@ -421,7 +421,7 @@ def _get_handle( # ZIP Compression elif compression == "zip": - zf = BytesZipFile(path_or_buf, mode, **compression_args) + zf = _BytesZipFile(path_or_buf, mode, **compression_args) # Ensure the container is closed as well. handles.append(zf) if zf.mode == "w": @@ -472,7 +472,7 @@ def _get_handle( if memory_map and hasattr(f, "fileno"): try: - wrapped = MMapWrapper(f) + wrapped = _MMapWrapper(f) f.close() f = wrapped except Exception: @@ -485,7 +485,7 @@ def _get_handle( return f, handles -class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore +class _BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore """ Wrapper for standard library class ZipFile and allow the returned file-like handle to accept byte strings via `write` method. @@ -518,7 +518,7 @@ def closed(self): return self.fp is None -class MMapWrapper(BaseIterator): +class _MMapWrapper(BaseIterator): """ Wrapper for the Python's mmap class so that it can be properly read in by Python's csv.reader class. @@ -537,7 +537,7 @@ def __init__(self, f: IO): def __getattr__(self, name: str): return getattr(self.mmap, name) - def __iter__(self) -> "MMapWrapper": + def __iter__(self) -> "_MMapWrapper": return self def __next__(self) -> str: diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 8368142c3633a..553334407d12e 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -15,11 +15,11 @@ from pandas.core.frame import DataFrame from pandas.io.common import ( - _is_url, - _stringify_path, - _validate_header_arg, get_filepath_or_buffer, + is_url, + stringify_path, urlopen, + validate_header_arg, ) from pandas.io.excel._util import ( _fill_mi_header, @@ -339,7 +339,7 @@ def read_excel( class _BaseExcelReader(metaclass=abc.ABCMeta): def __init__(self, filepath_or_buffer): # If filepath_or_buffer is a url, load the data into a BytesIO - if _is_url(filepath_or_buffer): + if is_url(filepath_or_buffer): filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read()) elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer) @@ -408,7 +408,7 @@ def parse( **kwds, ): - _validate_header_arg(header) + validate_header_arg(header) ret_dict = False @@ -708,7 +708,7 @@ def __init__( self.mode = mode def __fspath__(self): - return _stringify_path(self.path) + return stringify_path(self.path) def _get_sheet_name(self, sheet_name): if sheet_name is None: @@ -808,7 +808,7 @@ def __init__(self, io, engine=None): # could be a str, ExcelFile, Book, etc. self.io = io # Always a string - self._io = _stringify_path(io) + self._io = stringify_path(io) self._reader = self._engines[engine](self._io) diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 41bdf97c1fe1f..eb05004d9137c 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -4,7 +4,7 @@ from pandas import DataFrame, Int64Index, RangeIndex -from pandas.io.common import _stringify_path +from pandas.io.common import stringify_path def to_feather(df: DataFrame, path): @@ -20,7 +20,7 @@ def to_feather(df: DataFrame, path): import_optional_dependency("pyarrow") from pyarrow import feather - path = _stringify_path(path) + path = stringify_path(path) if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") @@ -98,6 +98,6 @@ def read_feather(path, columns=None, use_threads: bool = True): import_optional_dependency("pyarrow") from pyarrow import feather - path = _stringify_path(path) + path = stringify_path(path) return feather.read_feather(path, columns=columns, use_threads=bool(use_threads)) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index ae5d1d30bcddb..c0071028a8ef4 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -23,10 +23,10 @@ from pandas.io.common import ( UnicodeWriter, - _get_compression_method, - _get_handle, - _infer_compression, + get_compression_method, get_filepath_or_buffer, + get_handle, + infer_compression, ) @@ -61,7 +61,7 @@ def __init__( path_or_buf = StringIO() # Extract compression mode as given, if dict - compression, self.compression_args = _get_compression_method(compression) + compression, self.compression_args = get_compression_method(compression) self.path_or_buf, _, _, _ = get_filepath_or_buffer( path_or_buf, encoding=encoding, compression=compression, mode=mode @@ -78,7 +78,7 @@ def __init__( if encoding is None: encoding = "utf-8" self.encoding = encoding - self.compression = _infer_compression(self.path_or_buf, compression) + self.compression = infer_compression(self.path_or_buf, compression) if quoting is None: quoting = csvlib.QUOTE_MINIMAL @@ -179,7 +179,7 @@ def save(self): f = self.path_or_buf close = False else: - f, handles = _get_handle( + f, handles = get_handle( self.path_or_buf, self.mode, encoding=self.encoding, @@ -212,7 +212,7 @@ def save(self): else: compression = dict(self.compression_args, method=self.compression) - f, handles = _get_handle( + f, handles = get_handle( self.path_or_buf, self.mode, encoding=self.encoding, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 2f7a80eea1554..18340bc702378 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -15,6 +15,7 @@ from pandas import Index import pandas.core.common as com +from pandas.io.common import stringify_path from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.format import get_level_lengths from pandas.io.formats.printing import pprint_thing @@ -711,7 +712,6 @@ def write( and ``io.excel.xlsm.writer``. """ from pandas.io.excel import ExcelWriter - from pandas.io.common import _stringify_path num_rows, num_cols = self.df.shape if num_rows > self.max_rows or num_cols > self.max_cols: @@ -724,7 +724,7 @@ def write( if isinstance(writer, ExcelWriter): need_save = False else: - writer = ExcelWriter(_stringify_path(writer), engine=engine) + writer = ExcelWriter(stringify_path(writer), engine=engine) need_save = True formatted_cells = self.get_formatted_cells() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 124bd31c8d308..b0574925cf1b1 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -72,7 +72,7 @@ from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.io.common import _stringify_path +from pandas.io.common import stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing if TYPE_CHECKING: @@ -482,7 +482,7 @@ def get_buffer( objects, otherwise yield buf unchanged. """ if buf is not None: - buf = _stringify_path(buf) + buf = stringify_path(buf) else: buf = StringIO() diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 0c6b0c1a5810b..3a3347a5c86ea 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -12,7 +12,7 @@ from pandas import option_context -from pandas.io.common import _is_url +from pandas.io.common import is_url from pandas.io.formats.format import ( DataFrameFormatter, TableFormatter, @@ -147,7 +147,7 @@ def _write_cell( rs = pprint_thing(s, escape_chars=esc).strip() - if self.render_links and _is_url(rs): + if self.render_links and is_url(rs): rs_unescaped = pprint_thing(s, escape_chars={}).strip() start_tag += ''.format(url=rs_unescaped) end_a = "" diff --git a/pandas/io/html.py b/pandas/io/html.py index 3521bad375aa6..eafcca0e85bb3 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -16,7 +16,7 @@ from pandas.core.construction import create_series_with_explicit_dtype -from pandas.io.common import _is_url, _validate_header_arg, urlopen +from pandas.io.common import is_url, urlopen, validate_header_arg from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import TextParser @@ -117,7 +117,7 @@ def _read(obj): ------- raw_text : str """ - if _is_url(obj): + if is_url(obj): with urlopen(obj) as url: text = url.read() elif hasattr(obj, "read"): @@ -705,7 +705,7 @@ def _build_doc(self): parser = HTMLParser(recover=True, encoding=self.encoding) try: - if _is_url(self.io): + if is_url(self.io): with urlopen(self.io) as f: r = parse(f, parser=parser) else: @@ -717,7 +717,7 @@ def _build_doc(self): pass except (UnicodeDecodeError, IOError) as e: # if the input is a blob of html goop - if not _is_url(self.io): + if not is_url(self.io): r = fromstring(self.io, parser=parser) try: @@ -1076,7 +1076,7 @@ def read_html( "cannot skip rows starting from the end of the " "data (you passed a negative value)" ) - _validate_header_arg(header) + validate_header_arg(header) return _parse( flavor=flavor, io=io, diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 6cb811bb97755..14a272e15bc29 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -20,10 +20,10 @@ from pandas.io.common import ( BaseIterator, - _get_handle, - _infer_compression, - _stringify_path, get_filepath_or_buffer, + get_handle, + infer_compression, + stringify_path, ) from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import _validate_integer @@ -58,7 +58,7 @@ def to_json( "'index=False' is only valid when 'orient' is " "'split' or 'table'" ) - path_or_buf = _stringify_path(path_or_buf) + path_or_buf = stringify_path(path_or_buf) if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") @@ -91,7 +91,7 @@ def to_json( s = convert_to_line_delimits(s) if isinstance(path_or_buf, str): - fh, handles = _get_handle(path_or_buf, "w", compression=compression) + fh, handles = get_handle(path_or_buf, "w", compression=compression) try: fh.write(s) finally: @@ -584,7 +584,7 @@ def read_json( if encoding is None: encoding = "utf-8" - compression = _infer_compression(path_or_buf, compression) + compression = infer_compression(path_or_buf, compression) filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( path_or_buf, encoding=encoding, compression=compression ) @@ -704,7 +704,7 @@ def _get_data_from_filepath(self, filepath_or_buffer): pass if exists or self.compression is not None: - data, _ = _get_handle( + data, _ = get_handle( filepath_or_buffer, "r", encoding=self.encoding, diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 32d812637a067..cc3d2bd12ca35 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -35,6 +35,7 @@ is_categorical_dtype, is_dtype_equal, is_extension_array_dtype, + is_file_like, is_float, is_integer, is_integer_dtype, @@ -64,11 +65,10 @@ BaseIterator, UnicodeReader, UTF8Recoder, - _get_handle, - _infer_compression, - _validate_header_arg, get_filepath_or_buffer, - is_file_like, + get_handle, + infer_compression, + validate_header_arg, ) from pandas.io.date_converters import generic_parser @@ -426,7 +426,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): kwds["encoding"] = encoding compression = kwds.get("compression", "infer") - compression = _infer_compression(filepath_or_buffer, compression) + compression = infer_compression(filepath_or_buffer, compression) # TODO: get_filepath_or_buffer could return # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile] @@ -1050,7 +1050,7 @@ def _clean_options(self, options, engine): na_values = options["na_values"] skiprows = options["skiprows"] - _validate_header_arg(options["header"]) + validate_header_arg(options["header"]) depr_warning = "" @@ -2283,7 +2283,7 @@ def __init__(self, f, **kwds): self.comment = kwds["comment"] self._comment_lines = [] - f, handles = _get_handle( + f, handles = get_handle( f, "r", encoding=self.encoding, diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 0a0ccedd78f00..6ce52da21b4e8 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -4,7 +4,7 @@ from pandas.compat import pickle_compat as pc -from pandas.io.common import _get_handle, _stringify_path +from pandas.io.common import get_handle, stringify_path def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL): @@ -63,8 +63,8 @@ def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL): >>> import os >>> os.remove("./dummy.pkl") """ - path = _stringify_path(path) - f, fh = _get_handle(path, "wb", compression=compression, is_text=False) + path = stringify_path(path) + f, fh = get_handle(path, "wb", compression=compression, is_text=False) if protocol < 0: protocol = pickle.HIGHEST_PROTOCOL try: @@ -134,8 +134,8 @@ def read_pickle(path, compression="infer"): >>> import os >>> os.remove("./dummy.pkl") """ - path = _stringify_path(path) - f, fh = _get_handle(path, "rb", compression=compression, is_text=False) + path = stringify_path(path) + f, fh = get_handle(path, "rb", compression=compression, is_text=False) # 1) try standard library Pickle # 2) try pickle_compat (older pandas version) to handle subclass changes diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d14b4ecf070a7..8e0ab27c1fa85 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -63,7 +63,7 @@ from pandas.core.computation.pytables import PyTablesExpr, maybe_expression from pandas.core.indexes.api import ensure_index -from pandas.io.common import _stringify_path +from pandas.io.common import stringify_path from pandas.io.formats.printing import adjoin, pprint_thing if TYPE_CHECKING: @@ -274,7 +274,7 @@ def to_hdf( encoding=encoding, ) - path_or_buf = _stringify_path(path_or_buf) + path_or_buf = stringify_path(path_or_buf) if isinstance(path_or_buf, str): with HDFStore( path_or_buf, mode=mode, complevel=complevel, complib=complib @@ -379,7 +379,7 @@ def read_hdf( store = path_or_buf auto_close = False else: - path_or_buf = _stringify_path(path_or_buf) + path_or_buf = stringify_path(path_or_buf) if not isinstance(path_or_buf, str): raise NotImplementedError( "Support for generic buffers has not been implemented." @@ -525,7 +525,7 @@ def __init__( if complib is None and complevel is not None: complib = tables.filters.default_complib - self._path = _stringify_path(path) + self._path = stringify_path(path) if mode is None: mode = "a" self._mode = mode diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 6bd3532d538c7..56ebb583bc2f9 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -1,7 +1,7 @@ """ Read SAS sas7bdat or xport files. """ -from pandas.io.common import _stringify_path +from pandas.io.common import stringify_path def read_sas( @@ -52,7 +52,7 @@ def read_sas( "than a string name, you must specify " "a format string" ) - filepath_or_buffer = _stringify_path(filepath_or_buffer) + filepath_or_buffer = stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): raise ValueError(buffer_error_msg) fname = filepath_or_buffer.lower() diff --git a/pandas/io/stata.py b/pandas/io/stata.py index dbe64e4c0f06d..84dd302fc293f 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -44,7 +44,7 @@ from pandas.core.frame import DataFrame from pandas.core.series import Series -from pandas.io.common import BaseIterator, _stringify_path, get_filepath_or_buffer +from pandas.io.common import BaseIterator, get_filepath_or_buffer, stringify_path _version_error = ( "Version of given Stata file is not 104, 105, 108, " @@ -1051,7 +1051,7 @@ def __init__( self._lines_read = 0 self._native_byteorder = _set_endianness(sys.byteorder) - path_or_buf = _stringify_path(path_or_buf) + path_or_buf = stringify_path(path_or_buf) if isinstance(path_or_buf, str): path_or_buf, encoding, _, should_close = get_filepath_or_buffer(path_or_buf) @@ -2112,7 +2112,7 @@ def __init__( if byteorder is None: byteorder = sys.byteorder self._byteorder = _set_endianness(byteorder) - self._fname = _stringify_path(fname) + self._fname = stringify_path(fname) self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} self._converted_names = {} diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index ad058faff96e7..5c39dcc1a7659 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -21,7 +21,7 @@ import pandas.core.common as com import pandas.util.testing as tm -from pandas.io.common import _get_handle +from pandas.io.common import get_handle MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] MIXED_INT_DTYPES = [ @@ -1065,7 +1065,7 @@ def test_to_csv_compression(self, df, encoding, compression): tm.assert_frame_equal(df, result) # test the round trip using file handle - to_csv -> read_csv - f, _handles = _get_handle( + f, _handles = get_handle( filename, "w", compression=compression, encoding=encoding ) with f: diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index d2633ea0676cd..f4efbbeda6311 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -68,9 +68,9 @@ def test_expand_user_normal_path(self): assert os.path.expanduser(filename) == expanded_name def test_stringify_path_pathlib(self): - rel_path = icom._stringify_path(Path(".")) + rel_path = icom.stringify_path(Path(".")) assert rel_path == "." - redundant_path = icom._stringify_path(Path("foo//bar")) + redundant_path = icom.stringify_path(Path("foo//bar")) assert redundant_path == os.path.join("foo", "bar") @td.skip_if_no("py.path") @@ -78,11 +78,11 @@ def test_stringify_path_localpath(self): path = os.path.join("foo", "bar") abs_path = os.path.abspath(path) lpath = LocalPath(path) - assert icom._stringify_path(lpath) == abs_path + assert icom.stringify_path(lpath) == abs_path def test_stringify_path_fspath(self): p = CustomFSPath("foo/bar.csv") - result = icom._stringify_path(p) + result = icom.stringify_path(p) assert result == "foo/bar.csv" @pytest.mark.parametrize( @@ -92,7 +92,7 @@ def test_stringify_path_fspath(self): @pytest.mark.parametrize("path_type", path_types) def test_infer_compression_from_path(self, extension, expected, path_type): path = path_type("foo/bar.csv" + extension) - compression = icom._infer_compression(path, compression="infer") + compression = icom.infer_compression(path, compression="infer") assert compression == expected def test_get_filepath_or_buffer_with_path(self): @@ -313,18 +313,18 @@ def test_constructor_bad_file(self, mmap_file): err = mmap.error with pytest.raises(err, match=msg): - icom.MMapWrapper(non_file) + icom._MMapWrapper(non_file) target = open(mmap_file, "r") target.close() msg = "I/O operation on closed file" with pytest.raises(ValueError, match=msg): - icom.MMapWrapper(target) + icom._MMapWrapper(target) def test_get_attr(self, mmap_file): with open(mmap_file, "r") as target: - wrapper = icom.MMapWrapper(target) + wrapper = icom._MMapWrapper(target) attrs = dir(wrapper.mmap) attrs = [attr for attr in attrs if not attr.startswith("__")] @@ -337,7 +337,7 @@ def test_get_attr(self, mmap_file): def test_next(self, mmap_file): with open(mmap_file, "r") as target: - wrapper = icom.MMapWrapper(target) + wrapper = icom._MMapWrapper(target) lines = target.readlines() for line in lines: diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 54eb2d78fb64f..e17a32cbc8b68 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -44,14 +44,14 @@ def test_compression_size(obj, method, compression_only): @pytest.mark.parametrize("method", ["to_csv", "to_json"]) def test_compression_size_fh(obj, method, compression_only): with tm.ensure_clean() as path: - f, handles = icom._get_handle(path, "w", compression=compression_only) + f, handles = icom.get_handle(path, "w", compression=compression_only) with f: getattr(obj, method)(f) assert not f.closed assert f.closed compressed_size = os.path.getsize(path) with tm.ensure_clean() as path: - f, handles = icom._get_handle(path, "w", compression=None) + f, handles = icom.get_handle(path, "w", compression=None) with f: getattr(obj, method)(f) assert not f.closed @@ -108,7 +108,7 @@ def test_compression_warning(compression_only): columns=["X", "Y", "Z"], ) with tm.ensure_clean() as path: - f, handles = icom._get_handle(path, "w", compression=compression_only) + f, handles = icom.get_handle(path, "w", compression=compression_only) with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): with f: df.to_csv(f, compression=compression_only) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 9041d582b19ca..b48c79000c98d 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -9,7 +9,7 @@ from pandas import DataFrame, Series import pandas.util.testing as tm -from pandas.io.common import _get_handle +from pandas.io.common import get_handle class TestSeriesToCSV: @@ -143,7 +143,7 @@ def test_to_csv_compression(self, s, encoding, compression): tm.assert_series_equal(s, result) # test the round trip using file handle - to_csv -> read_csv - f, _handles = _get_handle( + f, _handles = get_handle( filename, "w", compression=compression, encoding=encoding ) with f: From 0cd388fdc30f526f2f9729ee09c7d16513a1442d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 10:46:52 -0800 Subject: [PATCH 25/37] CLN: remove py2-legacy UnicodeReader, UnicodeWriter (#30371) * CLN: remove py2-legacy UnicodeReader, UnicodeWriter * remove unnecessar y writer_kwargs --- pandas/io/common.py | 16 ---------------- pandas/io/formats/csvs.py | 11 ++++------- pandas/io/parsers.py | 23 ++++++----------------- 3 files changed, 10 insertions(+), 40 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 7151a34cd37de..c62de76286610 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -2,7 +2,6 @@ import bz2 import codecs -import csv import gzip from io import BufferedIOBase, BytesIO import mmap @@ -17,9 +16,7 @@ List, Mapping, Optional, - TextIO, Tuple, - Type, Union, ) from urllib.parse import ( # noqa @@ -574,16 +571,3 @@ def next(self) -> bytes: def close(self): self.reader.close() - - -# Keeping these class for now because it provides a necessary convenience -# for "dropping" the "encoding" argument from our I/O arguments when -# creating a Unicode I/O object. -def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds): - return csv.reader(f, dialect=dialect, **kwds) - - -def UnicodeWriter( - f: TextIO, dialect: Type[csv.Dialect] = csv.excel, encoding: str = "utf-8", **kwds -): - return csv.writer(f, dialect=dialect, **kwds) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index c0071028a8ef4..3a91d65ab4562 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -5,7 +5,7 @@ import csv as csvlib from io import StringIO import os -from typing import Any, Dict, List +from typing import List import warnings from zipfile import ZipFile @@ -22,7 +22,6 @@ from pandas.core.dtypes.missing import notna from pandas.io.common import ( - UnicodeWriter, get_compression_method, get_filepath_or_buffer, get_handle, @@ -188,7 +187,9 @@ def save(self): close = True try: - writer_kwargs: Dict[str, Any] = dict( + # Note: self.encoding is irrelevant here + self.writer = csvlib.writer( + f, lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, @@ -196,10 +197,6 @@ def save(self): escapechar=self.escapechar, quotechar=self.quotechar, ) - if self.encoding == "ascii": - self.writer = csvlib.writer(f, **writer_kwargs) - else: - self.writer = UnicodeWriter(f, encoding=self.encoding, **writer_kwargs) self._save() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index cc3d2bd12ca35..37cd36a2be3bc 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -63,7 +63,6 @@ from pandas.io.common import ( BaseIterator, - UnicodeReader, UTF8Recoder, get_filepath_or_buffer, get_handle, @@ -2431,23 +2430,13 @@ class MyDialect(csv.Dialect): self.line_pos += 1 sniffed = csv.Sniffer().sniff(line) dia.delimiter = sniffed.delimiter - if self.encoding is not None: - self.buf.extend( - list( - UnicodeReader( - StringIO(line), dialect=dia, encoding=self.encoding - ) - ) - ) - else: - self.buf.extend(list(csv.reader(StringIO(line), dialect=dia))) - if self.encoding is not None: - reader = UnicodeReader( - f, dialect=dia, encoding=self.encoding, strict=True - ) - else: - reader = csv.reader(f, dialect=dia, strict=True) + # Note: self.encoding is irrelevant here + line_rdr = csv.reader(StringIO(line), dialect=dia) + self.buf.extend(list(line_rdr)) + + # Note: self.encoding is irrelevant here + reader = csv.reader(f, dialect=dia, strict=True) else: From 837606765494f1e48498e87a536e77ba3635f45b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 18:16:29 -0800 Subject: [PATCH 26/37] CI: troubleshoot codecov (#30380) --- ci/run_tests.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 4c5dbabc81950..0b68164e5767e 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -37,8 +37,7 @@ echo $PYTEST_CMD sh -c "$PYTEST_CMD" if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then - SHA=`git rev-parse HEAD` echo "uploading coverage" - echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME -C $SHA" - bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME -C `git rev-parse HEAD` + echo "bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME" + bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME fi From c869255819b0767cd63a62ce6cc943c2388c5133 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 21 Dec 2019 19:55:52 +0000 Subject: [PATCH 27/37] CLN: move code out of try clause in merge.py (#30382) --- pandas/core/reshape/merge.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0fb029c8429a6..3dfd5fed34741 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -116,20 +116,20 @@ def _groupby_and_merge( # if we can groupby the rhs # then we can get vastly better perf - try: - # we will check & remove duplicates if indicated - if check_duplicates: - if on is None: - on = [] - elif not isinstance(on, (list, tuple)): - on = [on] - - if right.duplicated(by + on).any(): - _right = right.drop_duplicates(by + on, keep="last") - # TODO: use overload to refine return type of drop_duplicates - assert _right is not None # needed for mypy - right = _right + # we will check & remove duplicates if indicated + if check_duplicates: + if on is None: + on = [] + elif not isinstance(on, (list, tuple)): + on = [on] + + if right.duplicated(by + on).any(): + _right = right.drop_duplicates(by + on, keep="last") + # TODO: use overload to refine return type of drop_duplicates + assert _right is not None # needed for mypy + right = _right + try: rby = right.groupby(by, sort=False) except KeyError: rby = None From 477b2d5c80398bff25c8f214ff013a27e27248c8 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 21 Dec 2019 22:11:01 +0200 Subject: [PATCH 28/37] TYP: Annotations in core/indexes/ (#30390) * TYP * Update multi.py --- pandas/core/indexers.py | 5 +- pandas/core/indexes/accessors.py | 13 ++-- pandas/core/indexes/api.py | 2 - pandas/core/indexes/base.py | 10 ++- pandas/core/indexes/category.py | 4 +- pandas/core/indexes/datetimelike.py | 11 +-- pandas/core/indexes/datetimes.py | 55 ++++++++++----- pandas/core/indexes/frozen.py | 8 ++- pandas/core/indexes/interval.py | 102 +++++++++++++++++----------- pandas/core/indexes/multi.py | 41 +++++------ pandas/core/indexes/numeric.py | 30 ++++---- pandas/io/formats/console.py | 17 +++-- 12 files changed, 175 insertions(+), 123 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 209f889e809c3..ac1b0ab766a03 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -144,8 +144,9 @@ def validate_indices(indices: np.ndarray, n: int) -> None: if len(indices): min_idx = indices.min() if min_idx < -1: - msg = f"'indices' contains values less than allowed ({min_idx} < -1)" - raise ValueError(msg) + raise ValueError( + f"'indices' contains values less than allowed ({min_idx} < -1)" + ) max_idx = indices.max() if max_idx >= n: diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index ae27aad3dda08..db774a03c02f8 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -26,8 +26,7 @@ class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): def __init__(self, data, orig): if not isinstance(data, ABCSeries): raise TypeError( - f"cannot convert an object of type {type(data)} to a " - "datetimelike index" + f"cannot convert an object of type {type(data)} to a datetimelike index" ) self._parent = data @@ -91,9 +90,8 @@ def _delegate_property_get(self, name): def _delegate_property_set(self, name, value, *args, **kwargs): raise ValueError( - "modifications to a property of a datetimelike " - "object are not supported. Change values on the " - "original." + "modifications to a property of a datetimelike object are not supported. " + "Change values on the original." ) def _delegate_method(self, name, *args, **kwargs): @@ -222,7 +220,7 @@ def to_pytimedelta(self): Returns ------- - a : numpy.ndarray + numpy.ndarray Array of 1D containing data with `datetime.timedelta` type. See Also @@ -314,8 +312,7 @@ def __new__(cls, data): if not isinstance(data, ABCSeries): raise TypeError( - f"cannot convert an object of type {type(data)} to a " - "datetimelike index" + f"cannot convert an object of type {type(data)} to a datetimelike index" ) orig = data if is_categorical_dtype(data) else None diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index e99ae96f35315..1904456848396 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -124,7 +124,6 @@ def _get_combined_index( ------- Index """ - # TODO: handle index names! indexes = _get_distinct_objs(indexes) if len(indexes) == 0: @@ -273,7 +272,6 @@ def get_consensus_names(indexes): list A list representing the consensus 'names' found. """ - # find the non-none names, need to tupleify to make # the set hashable, then reverse on return consensus_names = {tuple(i.names) for i in indexes if com.any_not_none(*i.names)} diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5abd049b9564c..ce7a238daeca9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -804,11 +804,10 @@ def _assert_take_fillable( # only fill if we are passing a non-None fill_value if allow_fill and fill_value is not None: if (indices < -1).any(): - msg = ( + raise ValueError( "When allow_fill=True and fill_value is not None, " "all indices must be >= -1" ) - raise ValueError(msg) taken = algos.take( values, indices, allow_fill=allow_fill, fill_value=na_value ) @@ -1324,8 +1323,7 @@ def set_names(self, names, level=None, inplace=False): raise ValueError("Level must be None for non-MultiIndex") if level is not None and not is_list_like(level) and is_list_like(names): - msg = "Names must be a string when a single level is provided." - raise TypeError(msg) + raise TypeError("Names must be a string when a single level is provided.") if not is_list_like(names) and level is None and self.nlevels > 1: raise TypeError("Must pass list-like as `names`.") @@ -1421,8 +1419,8 @@ def _validate_index_level(self, level): if isinstance(level, int): if level < 0 and level != -1: raise IndexError( - f"Too many levels: Index has only 1 level," - f" {level} is not a valid level number" + "Too many levels: Index has only 1 level, " + f"{level} is not a valid level number" ) elif level > 0: raise IndexError( diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 44478d00da9cf..d35117b8db86e 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -715,9 +715,7 @@ def _convert_list_indexer(self, keyarr, kind=None): indexer = self.categories.get_indexer(np.asarray(keyarr)) if (indexer == -1).any(): raise KeyError( - "a list-indexer must only " - "include values that are " - "in the categories" + "a list-indexer must only include values that are in the categories" ) return self.get_indexer(keyarr) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 50dbddec5c8b2..3bf6dce00a031 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -2,7 +2,7 @@ Base and utility classes for tseries type pandas objects. """ import operator -from typing import Set +from typing import List, Set import numpy as np @@ -73,7 +73,7 @@ def method(self, other): class DatetimeIndexOpsMixin(ExtensionOpsMixin): """ - common ops mixin to support a unified interface datetimelike Index + Common ops mixin to support a unified interface datetimelike Index. """ _data: ExtensionArray @@ -336,7 +336,7 @@ def _convert_tolerance(self, tolerance, target): raise ValueError("list-like tolerance size must match target index size") return tolerance - def tolist(self): + def tolist(self) -> List: """ Return a list of the underlying data. """ @@ -661,11 +661,12 @@ def _summary(self, name=None): Parameters ---------- name : str - name to use in the summary representation + Name to use in the summary representation. Returns ------- - String with a summarized representation of the index + str + Summarized representation of the index. """ formatter = self._formatter_func if len(self) > 0: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 523c434cb7377..1fd962dd24656 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -45,9 +45,10 @@ def _new_DatetimeIndex(cls, d): - """ This is called upon unpickling, rather than the default which doesn't - have arguments and breaks __new__ """ - + """ + This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__ + """ if "data" in d and not isinstance(d["data"], DatetimeIndex): # Avoid need to verify integrity by calling simple_new directly data = d.pop("data") @@ -100,9 +101,9 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, DatetimeDelegateMixin): Parameters ---------- - data : array-like (1-dimensional), optional + data : array-like (1-dimensional), optional Optional datetime-like data to construct index with. - copy : bool + copy : bool Make a copy of input ndarray. freq : str or pandas offset object, optional One of pandas date offset strings or corresponding objects. The string @@ -273,7 +274,7 @@ def __new__( @classmethod def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): """ - we require the we have a dtype compat for the values + We require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ if isinstance(values, DatetimeArray): @@ -345,7 +346,13 @@ def tz(self, value): @cache_readonly def _is_dates_only(self) -> bool: - """Return a boolean if we are only dates (and don't have a timezone)""" + """ + Return a boolean if we are only dates (and don't have a timezone) + + Returns + ------- + bool + """ from pandas.io.formats.format import _is_dates_only return _is_dates_only(self.values) and self.tz is None @@ -360,7 +367,9 @@ def __reduce__(self): return _new_DatetimeIndex, (type(self), d), None def __setstate__(self, state): - """Necessary for making this object picklable""" + """ + Necessary for making this object picklable. + """ if isinstance(state, dict): super().__setstate__(state) @@ -393,7 +402,9 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ def _convert_for_op(self, value): - """ Convert value to be insertable to ndarray """ + """ + Convert value to be insertable to ndarray. + """ if self._has_same_tz(value): return _to_M8(value) raise ValueError("Passed item and index have different timezone") @@ -461,7 +472,7 @@ def _union(self, other, sort): def union_many(self, others): """ - A bit of a hack to accelerate unioning a collection of indexes + A bit of a hack to accelerate unioning a collection of indexes. """ this = self @@ -489,7 +500,7 @@ def union_many(self, others): this._data._dtype = dtype return this - def _can_fast_union(self, other): + def _can_fast_union(self, other) -> bool: if not isinstance(other, DatetimeIndex): return False @@ -581,7 +592,7 @@ def intersection(self, other, sort=False): Returns ------- - y : Index or DatetimeIndex or TimedeltaIndex + Index or DatetimeIndex or TimedeltaIndex """ return super().intersection(other, sort=sort) @@ -699,7 +710,9 @@ def snap(self, freq="S"): # we know it conforms; skip check return DatetimeIndex._simple_new(snapped, name=self.name, tz=self.tz, freq=freq) - def join(self, other, how="left", level=None, return_indexers=False, sort=False): + def join( + self, other, how: str = "left", level=None, return_indexers=False, sort=False + ): """ See Index.join """ @@ -840,9 +853,8 @@ def _parsed_string_to_bounds(self, reso, parsed): if parsed.tzinfo is not None: if self.tz is None: raise ValueError( - "The index must be timezone aware " - "when indexing with a date string with a " - "UTC offset" + "The index must be timezone aware when indexing " + "with a date string with a UTC offset" ) start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz) end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz) @@ -851,7 +863,16 @@ def _parsed_string_to_bounds(self, reso, parsed): end = end.tz_localize(self.tz) return start, end - def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): + def _partial_date_slice( + self, reso: str, parsed, use_lhs: bool = True, use_rhs: bool = True + ): + """ + Parameters + ---------- + reso : str + use_lhs : bool, default True + use_rhs : bool, default True + """ is_monotonic = self.is_monotonic if ( is_monotonic diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index fd8ab74ed4920..909643d50e9d7 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -35,7 +35,7 @@ def union(self, other) -> "FrozenList": Returns ------- - diff : FrozenList + FrozenList The collection difference between self and other. """ if isinstance(other, tuple): @@ -53,7 +53,7 @@ def difference(self, other) -> "FrozenList": Returns ------- - diff : FrozenList + FrozenList The collection difference between self and other. """ other = set(other) @@ -92,7 +92,9 @@ def __hash__(self): return hash(tuple(self)) def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" + """ + This method will not function because object is immutable. + """ raise TypeError(f"'{type(self).__name__}' does not support mutable operations.") def __str__(self) -> str: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index f046f0d89c428..dee4c959f8c90 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -143,21 +143,19 @@ def func(intvidx_self, other, sort=False): result = result.astype(intvidx_self.dtype) return result elif intvidx_self.closed != other.closed: - msg = ( + raise ValueError( "can only do set operations between two IntervalIndex " "objects that are closed on the same side" ) - raise ValueError(msg) # GH 19016: ensure set op will not return a prohibited dtype subtypes = [intvidx_self.dtype.subtype, other.dtype.subtype] common_subtype = find_common_type(subtypes) if is_object_dtype(common_subtype): - msg = ( + raise TypeError( f"can only do {self.op_name} between two IntervalIndex " "objects that have compatible dtypes" ) - raise TypeError(msg) return setop(intvidx_self, other, sort) @@ -210,7 +208,13 @@ class IntervalIndex(IntervalMixin, Index): # Constructors def __new__( - cls, data, closed=None, dtype=None, copy=False, name=None, verify_integrity=True + cls, + data, + closed=None, + dtype=None, + copy: bool = False, + name=None, + verify_integrity: bool = True, ): if name is None and hasattr(data, "name"): @@ -263,7 +267,9 @@ def _simple_new(cls, array, name, closed=None): ), ) ) - def from_breaks(cls, breaks, closed="right", name=None, copy=False, dtype=None): + def from_breaks( + cls, breaks, closed: str = "right", name=None, copy: bool = False, dtype=None + ): with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_breaks( breaks, closed=closed, copy=copy, dtype=dtype @@ -288,7 +294,13 @@ def from_breaks(cls, breaks, closed="right", name=None, copy=False, dtype=None): ) ) def from_arrays( - cls, left, right, closed="right", name=None, copy=False, dtype=None + cls, + left, + right, + closed: str = "right", + name=None, + copy: bool = False, + dtype=None, ): with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_arrays( @@ -313,7 +325,9 @@ def from_arrays( ), ) ) - def from_tuples(cls, data, closed="right", name=None, copy=False, dtype=None): + def from_tuples( + cls, data, closed: str = "right", name=None, copy: bool = False, dtype=None + ): with rewrite_exception("IntervalArray", cls.__name__): arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) return cls._simple_new(arr, name=name) @@ -329,7 +343,9 @@ def _shallow_copy(self, left=None, right=None, **kwargs): @cache_readonly def _isnan(self): - """Return a mask indicating if each value is NA""" + """ + Return a mask indicating if each value is NA. + """ if self._mask is None: self._mask = isna(self.left) return self._mask @@ -351,7 +367,7 @@ def __contains__(self, key) -> bool: Returns ------- - boolean + bool """ if not isinstance(key, Interval): return False @@ -470,7 +486,9 @@ def _ndarray_values(self) -> np.ndarray: return np.array(self._data) def __array__(self, result=None): - """ the array interface, return my values """ + """ + The array interface, return my values. + """ return self._ndarray_values def __array_wrap__(self, result, context=None): @@ -503,7 +521,9 @@ def astype(self, dtype, copy=True): @cache_readonly def dtype(self): - """Return the dtype object of the underlying data""" + """ + Return the dtype object of the underlying data. + """ return self._data.dtype @property @@ -551,7 +571,7 @@ def is_monotonic_decreasing(self) -> bool: @cache_readonly def is_unique(self): """ - Return True if the IntervalIndex contains unique elements, else False + Return True if the IntervalIndex contains unique elements, else False. """ left = self.left right = self.right @@ -708,7 +728,7 @@ def _needs_i8_conversion(self, key): Returns ------- - boolean + bool """ if is_interval_dtype(key) or isinstance(key, Interval): return self._needs_i8_conversion(key.left) @@ -729,7 +749,7 @@ def _maybe_convert_i8(self, key): Returns ------- - key: scalar or list-like + scalar or list-like The original key if no conversion occurred, int if converted scalar, Int64Index if converted list-like. """ @@ -775,22 +795,21 @@ def _check_method(self, method): return if method in ["bfill", "backfill", "pad", "ffill", "nearest"]: - msg = f"method {method} not yet implemented for IntervalIndex" - raise NotImplementedError(msg) + raise NotImplementedError( + f"method {method} not yet implemented for IntervalIndex" + ) raise ValueError("Invalid fill method") def _searchsorted_monotonic(self, label, side, exclude_label=False): if not self.is_non_overlapping_monotonic: raise KeyError( - "can only get slices from an IntervalIndex if " - "bounds are non-overlapping and all monotonic " - "increasing or decreasing" + "can only get slices from an IntervalIndex if bounds are " + "non-overlapping and all monotonic increasing or decreasing" ) if isinstance(label, IntervalMixin): - msg = "Interval objects are not currently supported" - raise NotImplementedError(msg) + raise NotImplementedError("Interval objects are not currently supported") # GH 20921: "not is_monotonic_increasing" for the second condition # instead of "is_monotonic_decreasing" to account for single element @@ -850,7 +869,7 @@ def get_loc( Returns ------- - loc : int if unique index, slice if monotonic index, else mask + int if unique index, slice if monotonic index, else mask Examples -------- @@ -933,11 +952,10 @@ def get_indexer( self._check_method(method) if self.is_overlapping: - msg = ( - "cannot handle overlapping indices; use " - "IntervalIndex.get_indexer_non_unique" + raise InvalidIndexError( + "cannot handle overlapping indices; " + "use IntervalIndex.get_indexer_non_unique" ) - raise InvalidIndexError(msg) target_as_index = ensure_index(target) @@ -1071,7 +1089,7 @@ def delete(self, loc): Returns ------- - new_index : IntervalIndex + IntervalIndex """ new_left = self.left.delete(loc) new_right = self.right.delete(loc) @@ -1090,7 +1108,7 @@ def insert(self, loc, item): Returns ------- - new_index : IntervalIndex + IntervalIndex """ if isinstance(item, Interval): if item.closed != self.closed: @@ -1117,11 +1135,10 @@ def _concat_same_dtype(self, to_concat, name): we allow a 0-len index here as well """ if not len({i.closed for i in to_concat if len(i)}) == 1: - msg = ( + raise ValueError( "can only append two IntervalIndex objects " "that are closed on the same side" ) - raise ValueError(msg) return super()._concat_same_dtype(to_concat, name) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) @@ -1175,10 +1192,13 @@ def _format_data(self, name=None): n = min(max_seq_items // 2, 10) head = [formatter(x) for x in self[:n]] tail = [formatter(x) for x in self[-n:]] - summary = f"[{', '.join(head)} ... {', '.join(tail)}]" + head_joined = ", ".join(head) + tail_joined = ", ".join(tail) + summary = f"[{head_joined} ... {tail_joined}]" else: tail = [formatter(x) for x in self] - summary = f"[{', '.join(tail)}]" + joined = ", ".join(tail) + summary = f"[{joined}]" return summary + "," + self._format_space() @@ -1189,7 +1209,7 @@ def _format_attrs(self): attrs.append(("dtype", f"'{self.dtype}'")) return attrs - def _format_space(self): + def _format_space(self) -> str: space = " " * (len(type(self).__name__) + 1) return f"\n{space}" @@ -1200,7 +1220,7 @@ def argsort(self, *args, **kwargs): def equals(self, other) -> bool: """ - Determines if two IntervalIndex objects contain the same elements + Determines if two IntervalIndex objects contain the same elements. """ if self.is_(other): return True @@ -1288,7 +1308,7 @@ def _intersection_unique(self, other: "IntervalIndex") -> "IntervalIndex": Returns ------- - taken : IntervalIndex + IntervalIndex """ lindexer = self.left.get_indexer(other.left) rindexer = self.right.get_indexer(other.right) @@ -1310,7 +1330,7 @@ def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex": Returns ------- - taken : IntervalIndex + IntervalIndex """ mask = np.zeros(len(self), dtype=bool) @@ -1360,7 +1380,9 @@ def is_all_dates(self) -> bool: def _is_valid_endpoint(endpoint) -> bool: - """helper for interval_range to check if start/end are valid types""" + """ + Helper for interval_range to check if start/end are valid types. + """ return any( [ is_number(endpoint), @@ -1372,7 +1394,9 @@ def _is_valid_endpoint(endpoint) -> bool: def _is_type_compatible(a, b) -> bool: - """helper for interval_range to check type compat of start/end/freq""" + """ + Helper for interval_range to check type compat of start/end/freq. + """ is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) return ( diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9e434d0f5f704..05a4da28eb0a1 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1,6 +1,7 @@ from collections import OrderedDict import datetime from sys import getsizeof +from typing import List, Optional import warnings import numpy as np @@ -85,7 +86,7 @@ def _codes_to_ints(self, codes): Returns ------- - int_keys : scalar or 1-dimensional array, of dtype uint64 + scalar or 1-dimensional array, of dtype uint64 Integer(s) representing one combination (each). """ # Shift the representation of each level by the pre-calculated number @@ -125,7 +126,7 @@ def _codes_to_ints(self, codes): Returns ------- - int_keys : int, or 1-dimensional array of dtype object + int, or 1-dimensional array of dtype object Integer(s) representing one combination (each). """ @@ -248,8 +249,8 @@ def __new__( dtype=None, copy=False, name=None, - verify_integrity=True, - _set_identity=True, + verify_integrity: bool = True, + _set_identity: bool = True, ): # compat with Index @@ -287,7 +288,7 @@ def __new__( return result - def _validate_codes(self, level: list, code: list): + def _validate_codes(self, level: List, code: List): """ Reassign code values as -1 if their corresponding levels are NaN. @@ -300,7 +301,7 @@ def _validate_codes(self, level: list, code: list): Returns ------- - code : new code where code value = -1 if it corresponds + new code where code value = -1 if it corresponds to a level with missing values (NaN, NaT, None). """ null_mask = isna(level) @@ -308,9 +309,10 @@ def _validate_codes(self, level: list, code: list): code = np.where(null_mask[code], -1, code) return code - def _verify_integrity(self, codes=None, levels=None): + def _verify_integrity( + self, codes: Optional[List] = None, levels: Optional[List] = None + ): """ - Parameters ---------- codes : optional list @@ -326,7 +328,7 @@ def _verify_integrity(self, codes=None, levels=None): Returns ------- - codes : new codes where code value = -1 if it corresponds to a + new codes where code value = -1 if it corresponds to a NaN level. """ # NOTE: Currently does not check, among other things, that cached @@ -336,8 +338,8 @@ def _verify_integrity(self, codes=None, levels=None): if len(levels) != len(codes): raise ValueError( - "Length of levels and codes must match. NOTE:" - " this index is in an inconsistent state." + "Length of levels and codes must match. NOTE: " + "this index is in an inconsistent state." ) codes_length = len(codes[0]) for i, (level, level_codes) in enumerate(zip(levels, codes)): @@ -389,7 +391,7 @@ def from_arrays(cls, arrays, sortorder=None, names=_no_default_names): Returns ------- - index : MultiIndex + MultiIndex See Also -------- @@ -454,7 +456,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): Returns ------- - index : MultiIndex + MultiIndex See Also -------- @@ -481,8 +483,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): if len(tuples) == 0: if names is None: - msg = "Cannot infer number of levels from empty list" - raise TypeError(msg) + raise TypeError("Cannot infer number of levels from empty list") arrays = [[]] * len(names) elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): @@ -518,7 +519,7 @@ def from_product(cls, iterables, sortorder=None, names=_no_default_names): Returns ------- - index : MultiIndex + MultiIndex See Also -------- @@ -653,15 +654,15 @@ def array(self): ------ ValueError """ - msg = ( + raise ValueError( "MultiIndex has no single backing array. Use " "'MultiIndex.to_numpy()' to get a NumPy array of tuples." ) - raise ValueError(msg) @property def _is_homogeneous_type(self) -> bool: - """Whether the levels of a MultiIndex all have the same dtype. + """ + Whether the levels of a MultiIndex all have the same dtype. This looks at the dtypes of the levels. @@ -732,7 +733,7 @@ def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): Level(s) to set (None for all levels). inplace : bool If True, mutates in place. - verify_integrity : bool (default True) + verify_integrity : bool, default True If True, checks that levels and codes are compatible. Returns diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 048bff46759bc..b84c69b8caf51 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -85,8 +85,9 @@ def _validate_dtype(cls, dtype: Dtype) -> None: validation_func, expected = validation_metadata[cls._typ] if not validation_func(dtype): - msg = f"Incorrect `dtype` passed: expected {expected}, received {dtype}" - raise ValueError(msg) + raise ValueError( + f"Incorrect `dtype` passed: expected {expected}, received {dtype}" + ) @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind): @@ -106,7 +107,6 @@ def _convert_for_op(self, value): """ Convert value to be insertable to ndarray. """ - if is_bool(value) or is_bool_dtype(value): # force conversion to object # so we don't lose the bools @@ -121,17 +121,13 @@ def _convert_tolerance(self, tolerance, target): if not np.issubdtype(tolerance.dtype, np.number): if tolerance.ndim > 0: raise ValueError( - ( - f"tolerance argument for {type(self).__name__} must contain " - "numeric elements if it is list type" - ) + f"tolerance argument for {type(self).__name__} must contain " + "numeric elements if it is list type" ) else: raise ValueError( - ( - f"tolerance argument for {type(self).__name__} must be numeric " - f"if it is a scalar: {repr(tolerance)}" - ) + f"tolerance argument for {type(self).__name__} must be numeric " + f"if it is a scalar: {repr(tolerance)}" ) return tolerance @@ -244,7 +240,9 @@ class Int64Index(IntegerIndex): @property def inferred_type(self) -> str: - """Always 'integer' for ``Int64Index``""" + """ + Always 'integer' for ``Int64Index`` + """ return "integer" @property @@ -299,7 +297,9 @@ class UInt64Index(IntegerIndex): @property def inferred_type(self) -> str: - """Always 'integer' for ``UInt64Index``""" + """ + Always 'integer' for ``UInt64Index`` + """ return "integer" @property @@ -374,7 +374,9 @@ class Float64Index(NumericIndex): @property def inferred_type(self) -> str: - """Always 'floating' for ``Float64Index``""" + """ + Always 'floating' for ``Float64Index`` + """ return "floating" @Appender(_index_shared_docs["astype"]) diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index 1d4fa929b2138..bed29e1fd4792 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -6,7 +6,8 @@ def get_console_size(): - """Return console size as tuple = (width, height). + """ + Return console size as tuple = (width, height). Returns (None,None) in non-interactive session. """ @@ -50,9 +51,13 @@ def get_console_size(): def in_interactive_session(): - """ check if we're running in an interactive shell + """ + Check if we're running in an interactive shell. - returns True if running under python/ipython interactive shell + Returns + ------- + bool + True if running under python/ipython interactive shell. """ from pandas import get_option @@ -71,7 +76,11 @@ def check_main(): def in_ipython_frontend(): """ - check if we're inside an an IPython zmq frontend + Check if we're inside an an IPython zmq frontend. + + Returns + ------- + bool """ try: ip = get_ipython() # noqa From 835f207c9283f5c85c4684c06d637123c4c84d5b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 22 Dec 2019 10:56:26 +0100 Subject: [PATCH 29/37] DOC: fix external links + favicon (#30389) --- doc/source/_static/favicon.ico | Bin 3902 -> 0 bytes doc/source/conf.py | 8 ++++++-- 2 files changed, 6 insertions(+), 2 deletions(-) delete mode 100644 doc/source/_static/favicon.ico diff --git a/doc/source/_static/favicon.ico b/doc/source/_static/favicon.ico deleted file mode 100644 index d15c4803b62e6dd2f706a5ebe1861fe438f5d98f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3902 zcmeH}Jxg3c6oyApR`3f%m!K?eBQf?Wh`|)vXg~{V3qO)NRjPnkB%oMmVIkVur4U3B z#Si=g5-b(;7x+=Fpl+U5MjU7FF1y@&!D455cJ4W6_PpoLoteAFRPafq4c}?g*=S7C z7}E(U_yY2)%{CZwVtICy@U```-9QrNV8OCTKMEyeDt@T)LKoaZ)?u0J;uDoHQhDJM zT!X8*q*xqHd7-Qs!{ollxuvE`j|$ZprWLqP?a?9Fg&oT_eK&-W)SAt=ZgoCPgS&rp z{Z+pS)AV}?+AGqW1XuG3dl&*Gd z|AG%N0W+5G^u6#AzFD7Qn(GuO?&kQ7-3Y4Ft@{ys01iF>8Fn4yt3AlS>E*b>ZiRWz zpTYhNd!GLk`&l#aA$d;5s)oN_jtlIvW_fPC)e>yJ!!`7WnjYZZqI0Gn_QBn^QSOzr z)sT+pgC;nDxHN;#k1F*1b11U=^j82{s-Ze2&2#d$#;n-K?he$_D5it9RZdY(XtVE&1|1)i*;M m=p8FsAoPD$6`Va9$mGWmd*#gyCLW9_f*Z!XlFi&C-tr%v>Bf=( diff --git a/doc/source/conf.py b/doc/source/conf.py index 096f1a63eddf8..481c03ab8f388 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -204,7 +204,11 @@ # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -# html_theme_options = {} +html_theme_options = { + "external_links": [], + "github_url": "https://github.com/pandas-dev/pandas", + "twitter_url": "https://twitter.com/pandas_dev", +} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = ["themes"] @@ -228,7 +232,7 @@ # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = os.path.join(html_static_path[0], "favicon.ico") +html_favicon = "../../web/pandas/static/img/favicon.ico" # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. From a2bbdb5a0abd131d0190fe58c0ba7cbf21b960c9 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sun, 22 Dec 2019 19:21:31 +0200 Subject: [PATCH 30/37] STY: Underscores for long numbers (#30397) --- pandas/_libs/tslibs/timestamps.pyx | 25 ++++++--- .../tests/scalar/timestamp/test_timestamp.py | 56 +++++++++---------- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index a44f374264f09..86a9d053730b8 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -336,11 +336,22 @@ class Timestamp(_Timestamp): """ return cls(datetime.combine(date, time)) - def __new__(cls, object ts_input=_no_input, - object freq=None, tz=None, unit=None, - year=None, month=None, day=None, - hour=None, minute=None, second=None, microsecond=None, - nanosecond=None, tzinfo=None): + def __new__( + cls, + object ts_input=_no_input, + object freq=None, + tz=None, + unit=None, + year=None, + month=None, + day=None, + hour=None, + minute=None, + second=None, + microsecond=None, + nanosecond=None, + tzinfo=None + ): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. # @@ -401,8 +412,8 @@ class Timestamp(_Timestamp): freq = None if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: - raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with the" - " tz parameter. Use tz_convert instead.") + raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " + "the tz parameter. Use tz_convert instead.") ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 512a83ed304d1..25609cb852ed4 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -201,17 +201,17 @@ class TestTimestampConstructors: def test_constructor(self): base_str = "2014-07-01 09:00" base_dt = datetime(2014, 7, 1, 9) - base_expected = 1404205200000000000 + base_expected = 1_404_205_200_000_000_000 # confirm base representation is correct - assert calendar.timegm(base_dt.timetuple()) * 1000000000 == base_expected + assert calendar.timegm(base_dt.timetuple()) * 1_000_000_000 == base_expected tests = [ (base_str, base_dt, base_expected), ( "2014-07-01 10:00", datetime(2014, 7, 1, 10), - base_expected + 3600 * 1000000000, + base_expected + 3600 * 1_000_000_000, ), ( "2014-07-01 09:00:00.000008000", @@ -250,7 +250,7 @@ def test_constructor(self): # with timezone for tz, offset in timezones: for result in [Timestamp(date_str, tz=tz), Timestamp(date, tz=tz)]: - expected_tz = expected - offset * 3600 * 1000000000 + expected_tz = expected - offset * 3600 * 1_000_000_000 assert result.value == expected_tz assert conversion.pydt_to_i8(result) == expected_tz @@ -264,7 +264,7 @@ def test_constructor(self): result = Timestamp(result).tz_convert("UTC") else: result = Timestamp(result, tz="UTC") - expected_utc = expected - offset * 3600 * 1000000000 + expected_utc = expected - offset * 3600 * 1_000_000_000 assert result.value == expected_utc assert conversion.pydt_to_i8(result) == expected_utc @@ -272,14 +272,14 @@ def test_constructor_with_stringoffset(self): # GH 7833 base_str = "2014-07-01 11:00:00+02:00" base_dt = datetime(2014, 7, 1, 9) - base_expected = 1404205200000000000 + base_expected = 1_404_205_200_000_000_000 # confirm base representation is correct - assert calendar.timegm(base_dt.timetuple()) * 1000000000 == base_expected + assert calendar.timegm(base_dt.timetuple()) * 1_000_000_000 == base_expected tests = [ (base_str, base_expected), - ("2014-07-01 12:00:00+02:00", base_expected + 3600 * 1000000000), + ("2014-07-01 12:00:00+02:00", base_expected + 3600 * 1_000_000_000), ("2014-07-01 11:00:00.000008000+02:00", base_expected + 8000), ("2014-07-01 11:00:00.000000005+02:00", base_expected + 5), ] @@ -725,7 +725,7 @@ def test_utc_z_designator(self): assert get_timezone(Timestamp("2014-11-02 01:00Z").tzinfo) is utc def test_asm8(self): - np.random.seed(7960929) + np.random.seed(7_960_929) ns = [Timestamp.min.value, Timestamp.max.value, 1000] for n in ns: @@ -786,7 +786,7 @@ def compare(x, y): ) def test_basics_nanos(self): - val = np.int64(946684800000000000).view("M8[ns]") + val = np.int64(946_684_800_000_000_000).view("M8[ns]") stamp = Timestamp(val.view("i8") + 500) assert stamp.year == 2000 assert stamp.month == 1 @@ -794,7 +794,7 @@ def test_basics_nanos(self): assert stamp.nanosecond == 500 # GH 14415 - val = np.iinfo(np.int64).min + 80000000000000 + val = np.iinfo(np.int64).min + 80_000_000_000_000 stamp = Timestamp(val) assert stamp.year == 1677 assert stamp.month == 9 @@ -807,8 +807,8 @@ def test_basics_nanos(self): [ [946688461000000000, {}], [946688461000000000 / 1000, dict(unit="us")], - [946688461000000000 / 1000000, dict(unit="ms")], - [946688461000000000 / 1000000000, dict(unit="s")], + [946688461000000000 / 1_000_000, dict(unit="ms")], + [946688461000000000 / 1_000_000_000, dict(unit="s")], [10957, dict(unit="D", h=0)], [ (946688461000000000 + 500000) / 1000000000, @@ -852,24 +852,24 @@ def test_roundtrip(self): base = Timestamp("20140101 00:00:00") result = Timestamp(base.value + Timedelta("5ms").value) - assert result == Timestamp(str(base) + ".005000") + assert result == Timestamp(f"{base}.005000") assert result.microsecond == 5000 result = Timestamp(base.value + Timedelta("5us").value) - assert result == Timestamp(str(base) + ".000005") + assert result == Timestamp(f"{base}.000005") assert result.microsecond == 5 result = Timestamp(base.value + Timedelta("5ns").value) - assert result == Timestamp(str(base) + ".000000005") + assert result == Timestamp(f"{base}.000000005") assert result.nanosecond == 5 assert result.microsecond == 0 result = Timestamp(base.value + Timedelta("6ms 5us").value) - assert result == Timestamp(str(base) + ".006005") + assert result == Timestamp(f"{base}.006005") assert result.microsecond == 5 + 6 * 1000 result = Timestamp(base.value + Timedelta("200ms 5us").value) - assert result == Timestamp(str(base) + ".200005") + assert result == Timestamp(f"{base}.200005") assert result.microsecond == 5 + 200 * 1000 def test_hash_equivalent(self): @@ -890,12 +890,12 @@ def test_nanosecond_string_parsing(self): ts = Timestamp("2013-05-01 07:15:45.123456789") # GH 7878 expected_repr = "2013-05-01 07:15:45.123456789" - expected_value = 1367392545123456789 + expected_value = 1_367_392_545_123_456_789 assert ts.value == expected_value assert expected_repr in repr(ts) ts = Timestamp("2013-05-01 07:15:45.123456789+09:00", tz="Asia/Tokyo") - assert ts.value == expected_value - 9 * 3600 * 1000000000 + assert ts.value == expected_value - 9 * 3600 * 1_000_000_000 assert expected_repr in repr(ts) ts = Timestamp("2013-05-01 07:15:45.123456789", tz="UTC") @@ -903,7 +903,7 @@ def test_nanosecond_string_parsing(self): assert expected_repr in repr(ts) ts = Timestamp("2013-05-01 07:15:45.123456789", tz="US/Eastern") - assert ts.value == expected_value + 4 * 3600 * 1000000000 + assert ts.value == expected_value + 4 * 3600 * 1_000_000_000 assert expected_repr in repr(ts) # GH 10041 @@ -913,7 +913,7 @@ def test_nanosecond_string_parsing(self): def test_nanosecond_timestamp(self): # GH 7610 - expected = 1293840000000000005 + expected = 1_293_840_000_000_000_005 t = Timestamp("2011-01-01") + offsets.Nano(5) assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" assert t.value == expected @@ -929,7 +929,7 @@ def test_nanosecond_timestamp(self): assert t.value == expected assert t.nanosecond == 5 - expected = 1293840000000000010 + expected = 1_293_840_000_000_000_010 t = t + offsets.Nano(5) assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" assert t.value == expected @@ -949,23 +949,23 @@ def test_nanosecond_timestamp(self): class TestTimestampToJulianDate: def test_compare_1700(self): r = Timestamp("1700-06-23").to_julian_date() - assert r == 2342145.5 + assert r == 2_342_145.5 def test_compare_2000(self): r = Timestamp("2000-04-12").to_julian_date() - assert r == 2451646.5 + assert r == 2_451_646.5 def test_compare_2100(self): r = Timestamp("2100-08-12").to_julian_date() - assert r == 2488292.5 + assert r == 2_488_292.5 def test_compare_hour01(self): r = Timestamp("2000-08-12T01:00:00").to_julian_date() - assert r == 2451768.5416666666666666 + assert r == 2_451_768.5416666666666666 def test_compare_hour13(self): r = Timestamp("2000-08-12T13:00:00").to_julian_date() - assert r == 2451769.0416666666666666 + assert r == 2_451_769.0416666666666666 class TestTimestampConversion: From 104fc1175b4bf622fe0e96f76be2a78541204c49 Mon Sep 17 00:00:00 2001 From: Luca Ionescu Date: Sun, 22 Dec 2019 23:49:24 +0100 Subject: [PATCH 31/37] fix call of tm.assert_frame_equal --- pandas/tests/io/json/test_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index cce9a52e5077d..2606f0aecc01c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1606,9 +1606,9 @@ def test_json_negative_indent_raises(self): @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") def test_deprecate_numpy_argument_read_json(self): # https://github.com/pandas-dev/pandas/issues/28512 - df = DataFrame([1, 2, 3]) + expected = DataFrame([1, 2, 3]) with tm.assert_produces_warning(None): with catch_warnings(): filterwarnings("ignore", category=FutureWarning) result = read_json(df.to_json(), numpy=True) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, expected) From 97b182b7522c26153d0ad4239afa2a29d23b7248 Mon Sep 17 00:00:00 2001 From: Luca Ionescu Date: Wed, 18 Dec 2019 23:06:34 +0100 Subject: [PATCH 32/37] add original changes. --- doc/source/whatsnew/v1.0.0.rst | 3 ++- pandas/io/json/_json.py | 4 ++++ pandas/tests/io/json/test_pandas.py | 11 +++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a15d5b319fc82..1272b0de27978 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -503,7 +503,8 @@ Deprecations Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). - :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) - +- The ``numpy`` argument of :meth:`pandas.read_json` is deprecated (:issue:`28512`). +- .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 14a272e15bc29..93b28f8a0e285 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -27,6 +27,7 @@ ) from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import _validate_integer +from pandas.util._decorators import deprecate_kwarg from ._normalize import convert_to_line_delimits from ._table_schema import build_table_schema, parse_table_schema @@ -353,6 +354,7 @@ def _write( return serialized +@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None) def read_json( path_or_buf=None, orient=None, @@ -466,6 +468,8 @@ def read_json( non-numeric column and index labels are supported. Note also that the JSON ordering MUST be the same for each term if numpy=True. + .. deprecated:: 1.0.0 + precise_float : bool, default False Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (False) is to use fast but diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index bce3d1de849aa..cce9a52e5077d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,6 +1,7 @@ from collections import OrderedDict from datetime import timedelta from io import StringIO +from warnings import catch_warnings, filterwarnings import json import os @@ -1601,3 +1602,13 @@ def test_json_indent_all_orients(self, orient, expected): def test_json_negative_indent_raises(self): with pytest.raises(ValueError, match="must be a nonnegative integer"): pd.DataFrame().to_json(indent=-1) + + @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") + def test_deprecate_numpy_argument_read_json(self): + # https://github.com/pandas-dev/pandas/issues/28512 + df = DataFrame([1, 2, 3]) + with tm.assert_produces_warning(None): + with catch_warnings(): + filterwarnings("ignore", category=FutureWarning) + result = read_json(df.to_json(), numpy=True) + assert_frame_equal(result, df) From 3c8f95b7515901de1f68433418e5690d4f7d5422 Mon Sep 17 00:00:00 2001 From: Luca Ionescu Date: Sun, 22 Dec 2019 23:49:24 +0100 Subject: [PATCH 33/37] fix call of tm.assert_frame_equal --- pandas/tests/io/json/test_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index cce9a52e5077d..2606f0aecc01c 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1606,9 +1606,9 @@ def test_json_negative_indent_raises(self): @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") def test_deprecate_numpy_argument_read_json(self): # https://github.com/pandas-dev/pandas/issues/28512 - df = DataFrame([1, 2, 3]) + expected = DataFrame([1, 2, 3]) with tm.assert_produces_warning(None): with catch_warnings(): filterwarnings("ignore", category=FutureWarning) result = read_json(df.to_json(), numpy=True) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, expected) From f46426e1ef7ea9a390b241fb3f30ccb32e301bca Mon Sep 17 00:00:00 2001 From: Luca Ionescu Date: Mon, 23 Dec 2019 00:03:46 +0100 Subject: [PATCH 34/37] Revert "fix call of tm.assert_frame_equal" This reverts commit 104fc1175b4bf622fe0e96f76be2a78541204c49. --- pandas/tests/io/json/test_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2606f0aecc01c..cce9a52e5077d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1606,9 +1606,9 @@ def test_json_negative_indent_raises(self): @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") def test_deprecate_numpy_argument_read_json(self): # https://github.com/pandas-dev/pandas/issues/28512 - expected = DataFrame([1, 2, 3]) + df = DataFrame([1, 2, 3]) with tm.assert_produces_warning(None): with catch_warnings(): filterwarnings("ignore", category=FutureWarning) result = read_json(df.to_json(), numpy=True) - tm.assert_frame_equal(result, expected) + assert_frame_equal(result, df) From 0be5dd79c653d77a38f9f52bfec0bb7aca132d85 Mon Sep 17 00:00:00 2001 From: Luca Ionescu Date: Mon, 23 Dec 2019 00:05:00 +0100 Subject: [PATCH 35/37] Revert "merge." This reverts commit df2671b7fd179b91c806f824925f7d48ff62ade9, reversing changes made to 104fc1175b4bf622fe0e96f76be2a78541204c49. --- ci/run_tests.sh | 4 +- doc/source/_static/favicon.ico | Bin 0 -> 3902 bytes doc/source/conf.py | 8 +- doc/source/user_guide/io.rst | 5 +- doc/source/user_guide/text.rst | 9 +- doc/source/whatsnew/v1.0.0.rst | 16 +- pandas/_config/config.py | 4 +- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/index.pyx | 2 +- pandas/_libs/lib.pyx | 2 +- pandas/_libs/parsers.pyx | 21 +- pandas/_libs/src/klib/khash.h | 2 +- pandas/_libs/src/ujson/lib/ultrajsondec.c | 6 +- pandas/_libs/src/ujson/python/objToJSON.c | 2 +- pandas/_libs/tslibs/timestamps.pyx | 27 +- pandas/core/arrays/datetimes.py | 7 +- pandas/core/arrays/sparse/dtype.py | 2 +- pandas/core/arrays/string_.py | 4 +- pandas/core/dtypes/cast.py | 5 +- pandas/core/dtypes/common.py | 10 +- pandas/core/dtypes/dtypes.py | 9 +- pandas/core/frame.py | 51 ++-- pandas/core/generic.py | 79 +++--- pandas/core/groupby/base.py | 2 +- pandas/core/indexers.py | 5 +- pandas/core/indexes/accessors.py | 13 +- pandas/core/indexes/api.py | 2 + pandas/core/indexes/base.py | 10 +- pandas/core/indexes/category.py | 4 +- pandas/core/indexes/datetimelike.py | 11 +- pandas/core/indexes/datetimes.py | 55 ++-- pandas/core/indexes/frozen.py | 8 +- pandas/core/indexes/interval.py | 102 +++----- pandas/core/indexes/multi.py | 41 ++- pandas/core/indexes/numeric.py | 30 ++- pandas/core/indexing.py | 36 ++- pandas/core/internals/blocks.py | 64 +++-- pandas/core/internals/construction.py | 13 +- pandas/core/internals/managers.py | 28 ++- pandas/core/ops/mask_ops.py | 2 +- pandas/core/resample.py | 26 +- pandas/core/reshape/merge.py | 26 +- pandas/core/strings.py | 45 ++-- pandas/io/common.py | 73 ++++-- pandas/io/excel/_base.py | 18 +- pandas/io/feather_format.py | 13 +- pandas/io/formats/console.py | 17 +- pandas/io/formats/csvs.py | 25 +- pandas/io/formats/excel.py | 4 +- pandas/io/formats/format.py | 6 +- pandas/io/formats/html.py | 4 +- pandas/io/gbq.py | 2 +- pandas/io/html.py | 10 +- pandas/io/json/_json.py | 16 +- pandas/io/parquet.py | 21 +- pandas/io/parsers.py | 51 ++-- pandas/io/pickle.py | 10 +- pandas/io/pytables.py | 234 +++++++++--------- pandas/io/sas/sasreader.py | 4 +- pandas/io/stata.py | 6 +- pandas/tests/dtypes/cast/test_infer_dtype.py | 29 +-- pandas/tests/extension/json/array.py | 2 +- pandas/tests/frame/methods/__init__.py | 7 - pandas/tests/frame/test_analytics.py | 106 ++++++++ pandas/tests/frame/{methods => }/test_asof.py | 0 pandas/tests/frame/test_constructors.py | 16 -- pandas/tests/frame/test_cumulative.py | 120 --------- .../tests/frame/{methods => }/test_explode.py | 0 .../frame/{methods => }/test_quantile.py | 0 pandas/tests/frame/{methods => }/test_rank.py | 0 .../tests/frame/{methods => }/test_replace.py | 0 pandas/tests/frame/test_to_csv.py | 4 +- .../indexes/datetimes/test_date_range.py | 4 +- pandas/tests/indexes/datetimes/test_tools.py | 2 +- .../tests/indexes/timedeltas/test_indexing.py | 4 +- pandas/tests/io/parser/test_na_values.py | 6 +- pandas/tests/io/pytables/test_store.py | 4 +- pandas/tests/io/sas/test_sas7bdat.py | 4 +- pandas/tests/io/test_common.py | 29 ++- pandas/tests/io/test_compression.py | 6 +- pandas/tests/plotting/test_converter.py | 2 +- .../tests/scalar/timestamp/test_timestamp.py | 56 ++--- pandas/tests/series/methods/__init__.py | 7 - pandas/tests/series/test_analytics.py | 111 +++++++++ .../tests/series/{methods => }/test_asof.py | 0 pandas/tests/series/test_cumulative.py | 142 ----------- .../series/{methods => }/test_explode.py | 0 pandas/tests/series/test_io.py | 4 +- .../series/{methods => }/test_quantile.py | 0 .../tests/series/{methods => }/test_rank.py | 0 .../series/{methods => }/test_replace.py | 0 pandas/tests/test_strings.py | 24 +- pandas/tests/tseries/offsets/test_offsets.py | 44 ---- setup.cfg | 229 ----------------- setup.py | 15 +- 95 files changed, 955 insertions(+), 1336 deletions(-) create mode 100644 doc/source/_static/favicon.ico delete mode 100644 pandas/tests/frame/methods/__init__.py rename pandas/tests/frame/{methods => }/test_asof.py (100%) delete mode 100644 pandas/tests/frame/test_cumulative.py rename pandas/tests/frame/{methods => }/test_explode.py (100%) rename pandas/tests/frame/{methods => }/test_quantile.py (100%) rename pandas/tests/frame/{methods => }/test_rank.py (100%) rename pandas/tests/frame/{methods => }/test_replace.py (100%) delete mode 100644 pandas/tests/series/methods/__init__.py rename pandas/tests/series/{methods => }/test_asof.py (100%) delete mode 100644 pandas/tests/series/test_cumulative.py rename pandas/tests/series/{methods => }/test_explode.py (100%) rename pandas/tests/series/{methods => }/test_quantile.py (100%) rename pandas/tests/series/{methods => }/test_rank.py (100%) rename pandas/tests/series/{methods => }/test_replace.py (100%) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 0b68164e5767e..b91cfb3bed8cc 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -38,6 +38,6 @@ sh -c "$PYTEST_CMD" if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then echo "uploading coverage" - echo "bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME" - bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME + echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME" + bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME fi diff --git a/doc/source/_static/favicon.ico b/doc/source/_static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..d15c4803b62e6dd2f706a5ebe1861fe438f5d98f GIT binary patch literal 3902 zcmeH}Jxg3c6oyApR`3f%m!K?eBQf?Wh`|)vXg~{V3qO)NRjPnkB%oMmVIkVur4U3B z#Si=g5-b(;7x+=Fpl+U5MjU7FF1y@&!D455cJ4W6_PpoLoteAFRPafq4c}?g*=S7C z7}E(U_yY2)%{CZwVtICy@U```-9QrNV8OCTKMEyeDt@T)LKoaZ)?u0J;uDoHQhDJM zT!X8*q*xqHd7-Qs!{ollxuvE`j|$ZprWLqP?a?9Fg&oT_eK&-W)SAt=ZgoCPgS&rp z{Z+pS)AV}?+AGqW1XuG3dl&*Gd z|AG%N0W+5G^u6#AzFD7Qn(GuO?&kQ7-3Y4Ft@{ys01iF>8Fn4yt3AlS>E*b>ZiRWz zpTYhNd!GLk`&l#aA$d;5s)oN_jtlIvW_fPC)e>yJ!!`7WnjYZZqI0Gn_QBn^QSOzr z)sT+pgC;nDxHN;#k1F*1b11U=^j82{s-Ze2&2#d$#;n-K?he$_D5it9RZdY(XtVE&1|1)i*;M m=p8FsAoPD$6`Va9$mGWmd*#gyCLW9_f*Z!XlFi&C-tr%v>Bf=( literal 0 HcmV?d00001 diff --git a/doc/source/conf.py b/doc/source/conf.py index 481c03ab8f388..096f1a63eddf8 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -204,11 +204,7 @@ # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -html_theme_options = { - "external_links": [], - "github_url": "https://github.com/pandas-dev/pandas", - "twitter_url": "https://twitter.com/pandas_dev", -} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = ["themes"] @@ -232,7 +228,7 @@ # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = "../../web/pandas/static/img/favicon.ico" +html_favicon = os.path.join(html_static_path[0], "favicon.ico") # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index c32b009948fda..ae0f02312e1df 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -4763,10 +4763,10 @@ Parquet supports partitioning of data based on the values of one or more columns .. ipython:: python df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]}) - df.to_parquet(path='test', engine='pyarrow', + df.to_parquet(fname='test', engine='pyarrow', partition_cols=['a'], compression=None) -The `path` specifies the parent directory to which data will be saved. +The `fname` specifies the parent directory to which data will be saved. The `partition_cols` are the column names by which the dataset will be partitioned. Columns are partitioned in the order they are given. The partition splits are determined by the unique values in the partition columns. @@ -4828,6 +4828,7 @@ See also some :ref:`cookbook examples ` for some advanced strategi The key functions are: .. autosummary:: + :toctree: ../reference/api/ read_sql_table read_sql_query diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst index 53c7a7437d55f..ff0474dbecbb4 100644 --- a/doc/source/user_guide/text.rst +++ b/doc/source/user_guide/text.rst @@ -74,7 +74,6 @@ These are places where the behavior of ``StringDtype`` objects differ from l. For ``StringDtype``, :ref:`string accessor methods` that return **numeric** output will always return a nullable integer dtype, rather than either int or float dtype, depending on the presence of NA values. - Methods returning **boolean** output will return a nullable boolean dtype. .. ipython:: python @@ -90,13 +89,7 @@ l. For ``StringDtype``, :ref:`string accessor methods` s.astype(object).str.count("a") s.astype(object).dropna().str.count("a") - When NA values are present, the output dtype is float64. Similarly for - methods returning boolean values. - - .. ipython:: python - - s.str.isdigit() - s.str.match("a") + When NA values are present, the output dtype is float64. 2. Some string methods, like :meth:`Series.str.decode` are not available on ``StringArray`` because ``StringArray`` only holds strings, not diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1272b0de27978..c978a1825a390 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -206,7 +206,6 @@ Other enhancements now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`). - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) -- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) Build Changes @@ -255,10 +254,10 @@ To update, use ``MultiIndex.set_names``, which returns a new ``MultiIndex``. mi2 = mi.set_names("new name", level=0) mi2.names -New repr for :class:`~pandas.arrays.IntervalArray` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +New repr for :class:`pandas.core.arrays.IntervalArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- :class:`pandas.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) +- :class:`pandas.core.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) *pandas 0.25.x* @@ -502,7 +501,6 @@ Deprecations - :func:`pandas.json_normalize` is now exposed in the top-level namespace. Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). -- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) - The ``numpy`` argument of :meth:`pandas.read_json` is deprecated (:issue:`28512`). - @@ -580,7 +578,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`) - :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` constructors no longer allow ``start``, ``end``, and ``periods`` keywords, use :func:`date_range`, :func:`timedelta_range`, and :func:`period_range` instead (:issue:`23919`) - :class:`DatetimeIndex` and :class:`TimedeltaIndex` constructors no longer have a ``verify_integrity`` keyword argument (:issue:`23919`) -- ``pandas.core.internals.blocks.make_block`` no longer accepts the "fastpath" keyword(:issue:`19265`) +- :func:`core.internals.blocks.make_block` no longer accepts the "fastpath" keyword(:issue:`19265`) - :meth:`Block.make_block_same_class` no longer accepts the "dtype" keyword(:issue:`19434`) - Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) - Removed the previously deprecated :meth:`MultiIndex.to_hierarchical` (:issue:`21613`) @@ -657,7 +655,7 @@ Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`) -- Performance improvement in :attr:`MultiIndex.is_monotonic` (:issue:`27495`) +- Performance improvement in `MultiIndex.is_monotonic` (:issue:`27495`) - Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`) - Performance improvement when initializing a :class:`DataFrame` using a ``range`` (:issue:`30171`) - Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`) @@ -713,7 +711,7 @@ Datetimelike - Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) -- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) +- Timedelta ^^^^^^^^^ @@ -760,7 +758,7 @@ Interval ^^^^^^^^ - Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`) -- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`) +- Indexing ^^^^^^^^ diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 6844df495547a..9e74eb46f7b1f 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -57,10 +57,10 @@ DeprecatedOption = namedtuple("DeprecatedOption", "key msg rkey removal_ver") RegisteredOption = namedtuple("RegisteredOption", "key defval doc validator cb") -# holds deprecated option metadata +# holds deprecated option metdata _deprecated_options: Dict[str, DeprecatedOption] = {} -# holds registered option metadata +# holds registered option metdata _registered_options: Dict[str, RegisteredOption] = {} # holds the current values for registered options diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index abb8a6d388d26..9e5fa75ebeceb 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -791,7 +791,7 @@ def group_quantile(ndarray[float64_t] out, out[i] = NaN else: # Calculate where to retrieve the desired value - # Casting to int will intentionally truncate result + # Casting to int will intentionaly truncate result idx = grp_start + (q * (non_na_sz - 1)) val = values[sort_arr[idx]] diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index ac8172146d351..0ed48efb03035 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -288,7 +288,7 @@ cdef class IndexEngine: def get_indexer_non_unique(self, targets): """ - Return an indexer suitable for taking from a non unique index + Return an indexer suitable for takng from a non unique index return the labels in the same order ast the target and a missing indexer into the targets (which correspond to the -1 indices in the results diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 3f578a453aa1d..e9a486894fbf0 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -510,7 +510,7 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): @cython.boundscheck(False) def array_equivalent_object(left: object[:], right: object[:]) -> bool: """ - Perform an element by element comparison on 1-d object arrays + Perform an element by element comparion on 1-d object arrays taking into account nan positions. """ cdef: diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 1b566af7a5437..bb1493280dfd2 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1367,26 +1367,7 @@ def _ensure_encoded(list lst): # common NA values # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', -STR_NA_VALUES = { - "-1.#IND", - "1.#QNAN", - "1.#IND", - "-1.#QNAN", - "#N/A N/A", - "#N/A", - "N/A", - "n/a", - "NA", - "#NA", - "NULL", - "null", - "NaN", - "-NaN", - "nan", - "-nan", - "", -} -_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES)) +_NA_VALUES = _ensure_encoded(list(icom._NA_VALUES)) def _maybe_upcast(arr): diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h index bcf6350aa9090..77ec519cc24da 100644 --- a/pandas/_libs/src/klib/khash.h +++ b/pandas/_libs/src/klib/khash.h @@ -498,7 +498,7 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) */ #define kh_n_buckets(h) ((h)->n_buckets) -/* More convenient interfaces */ +/* More conenient interfaces */ /*! @function @abstract Instantiate a hash set containing integer keys diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c index 26b00c0cacd31..a847b0f5d5102 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsondec.c +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -150,7 +150,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { case '7': case '8': case '9': { - // FIXME: Check for arithmetic overflow here + // FIXME: Check for arithemtic overflow here // PERF: Don't do 64-bit arithmetic here unless we know we have // to intValue = intValue * 10ULL + (JSLONG)(chr - 48); @@ -235,7 +235,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { } BREAK_FRC_LOOP: - // FIXME: Check for arithmetic overflow here + // FIXME: Check for arithemtic overflow here ds->lastType = JT_DOUBLE; ds->start = offset; return ds->dec->newDouble( @@ -282,7 +282,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { } BREAK_EXP_LOOP: - // FIXME: Check for arithmetic overflow here + // FIXME: Check for arithemtic overflow here ds->lastType = JT_DOUBLE; ds->start = offset; return ds->dec->newDouble( diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 37e9c36a85327..5d17d3a2d7bcb 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -1632,7 +1632,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, sprintf(buf, "%" NPY_INT64_FMT, value); len = strlen(cLabel); } - } else { // Fallback to string representation + } else { // Fallack to string representation PyObject *str = PyObject_Str(item); if (str == NULL) { Py_DECREF(item); diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 86a9d053730b8..e4e7f65db8dea 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -324,7 +324,7 @@ class Timestamp(_Timestamp): Function is not implemented. Use pd.to_datetime(). """ - raise NotImplementedError("Timestamp.strptime() is not implemented." + raise NotImplementedError("Timestamp.strptime() is not implmented." "Use to_datetime() to parse date strings.") @classmethod @@ -336,22 +336,11 @@ class Timestamp(_Timestamp): """ return cls(datetime.combine(date, time)) - def __new__( - cls, - object ts_input=_no_input, - object freq=None, - tz=None, - unit=None, - year=None, - month=None, - day=None, - hour=None, - minute=None, - second=None, - microsecond=None, - nanosecond=None, - tzinfo=None - ): + def __new__(cls, object ts_input=_no_input, + object freq=None, tz=None, unit=None, + year=None, month=None, day=None, + hour=None, minute=None, second=None, microsecond=None, + nanosecond=None, tzinfo=None): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. # @@ -412,8 +401,8 @@ class Timestamp(_Timestamp): freq = None if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: - raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " - "the tz parameter. Use tz_convert instead.") + raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with the" + " tz parameter. Use tz_convert instead.") ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 10669b09cefec..e41f2a840d151 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -794,7 +794,9 @@ def _add_offset(self, offset): values = self.tz_localize(None) else: values = self - result = offset.apply_index(values).tz_localize(self.tz) + result = offset.apply_index(values) + if self.tz is not None: + result = result.tz_localize(self.tz) except NotImplementedError: warnings.warn( @@ -802,9 +804,6 @@ def _add_offset(self, offset): PerformanceWarning, ) result = self.astype("O") + offset - if len(self) == 0: - # _from_sequence won't be able to infer self.tz - return type(self)._from_sequence(result).tz_localize(self.tz) return type(self)._from_sequence(result, freq="infer") diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index ce16a1620eed5..935f657416396 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -290,7 +290,7 @@ def update_dtype(self, dtype): Returns ------- SparseDtype - A new SparseDtype with the correct `dtype` and fill value + A new SparseDtype with the corret `dtype` and fill value for that `dtype`. Raises diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index de254f662bb32..0d30aa06cd466 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -86,7 +86,7 @@ def __from_arrow__(self, array): results = [] for arr in chunks: - # using _from_sequence to ensure None is converted to NA + # using _from_sequence to ensure None is convered to NA str_arr = StringArray._from_sequence(np.array(arr)) results.append(str_arr) @@ -153,7 +153,7 @@ class StringArray(PandasArray): ... ValueError: StringArray requires an object-dtype ndarray of strings. - For comparison methods, this returns a :class:`pandas.BooleanArray` + For comparision methods, this returns a :class:`pandas.BooleanArray` >>> pd.array(["a", None, "c"], dtype="string") == "a" diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1ab21f18f3bdc..b398a197a4bc0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -41,7 +41,7 @@ is_unsigned_integer_dtype, pandas_dtype, ) -from .dtypes import DatetimeTZDtype, ExtensionDtype, IntervalDtype, PeriodDtype +from .dtypes import DatetimeTZDtype, ExtensionDtype, PeriodDtype from .generic import ( ABCDataFrame, ABCDatetimeArray, @@ -601,9 +601,6 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False): if lib.is_period(val): dtype = PeriodDtype(freq=val.freq) val = val.ordinal - elif lib.is_interval(val): - subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] - dtype = IntervalDtype(subtype=subtype) return dtype, val diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 8e629896fdb7b..602d7d0da95e6 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -193,7 +193,9 @@ def ensure_python_int(value: Union[int, np.integer]) -> int: TypeError: if the value isn't an int or can't be converted to one. """ if not is_scalar(value): - raise TypeError(f"Value needs to be a scalar value, was type {type(value)}") + raise TypeError( + "Value needs to be a scalar value, was type {}".format(type(value)) + ) msg = "Wrong type {} for value {}" try: new_value = int(value) @@ -1857,7 +1859,7 @@ def _validate_date_like_dtype(dtype) -> None: try: typ = np.datetime_data(dtype)[0] except ValueError as e: - raise TypeError(e) + raise TypeError("{error}".format(error=e)) if typ != "generic" and typ != "ns": raise ValueError( f"{repr(dtype.name)} is too specific of a frequency, " @@ -1898,7 +1900,7 @@ def pandas_dtype(dtype): npdtype = np.dtype(dtype) except SyntaxError: # np.dtype uses `eval` which can raise SyntaxError - raise TypeError(f"data type '{dtype}' not understood") + raise TypeError("data type '{}' not understood".format(dtype)) # Any invalid dtype (such as pd.Timestamp) should raise an error. # np.dtype(invalid_type).kind = 0 for such objects. However, this will @@ -1910,6 +1912,6 @@ def pandas_dtype(dtype): # here and `dtype` is an array return npdtype elif npdtype.kind == "O": - raise TypeError(f"dtype '{dtype}' not understood") + raise TypeError("dtype '{}' not understood".format(dtype)) return npdtype diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 6f8f6e8abbc0a..77ec182be5ed4 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -749,7 +749,7 @@ def construct_from_string(cls, string: str_type): raise TypeError("Cannot construct a 'DatetimeTZDtype'") def __str__(self) -> str_type: - return f"datetime64[{self.unit}, {self.tz}]" + return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) @property def name(self) -> str_type: @@ -890,7 +890,7 @@ def __str__(self) -> str_type: @property def name(self) -> str_type: - return f"period[{self.freq.freqstr}]" + return "period[{freq}]".format(freq=self.freq.freqstr) @property def na_value(self): @@ -1054,7 +1054,8 @@ def construct_from_string(cls, string): if its not possible """ if not isinstance(string, str): - raise TypeError(f"a string needs to be passed, got type {type(string)}") + msg = "a string needs to be passed, got type {typ}" + raise TypeError(msg.format(typ=type(string))) if string.lower() == "interval" or cls._match.search(string) is not None: return cls(string) @@ -1074,7 +1075,7 @@ def type(self): def __str__(self) -> str_type: if self.subtype is None: return "interval" - return f"interval[{self.subtype}]" + return "interval[{subtype}]".format(subtype=self.subtype) def __hash__(self) -> int: # make myself hashable diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 766437dbad8f8..b699961cf07e8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -37,12 +37,7 @@ from pandas._libs import algos as libalgos, lib from pandas.compat.numpy import function as nv -from pandas.util._decorators import ( - Appender, - Substitution, - deprecate_kwarg, - rewrite_axis_style_signature, -) +from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature from pandas.util._validators import ( validate_axis_style_args, validate_bool_kwarg, @@ -455,7 +450,7 @@ def __init__( # For data is list-like, or Iterable (will consume into list) elif isinstance(data, abc.Iterable) and not isinstance(data, (str, bytes)): - if not isinstance(data, (abc.Sequence, ExtensionArray)): + if not isinstance(data, abc.Sequence): data = list(data) if len(data) > 0: if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: @@ -1745,7 +1740,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None): rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)], dtype=[('I', 'S2'), ('A', '>> index_dtypes = f">> index_dtypes = ">> df.to_records(index_dtypes=index_dtypes) rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)], dtype=[('I', 'S1'), ('A', ' None: ) msg = ( - f"'{key}' is both {level_article} {level_type} level and " - f"{label_article} {label_type} label, which is ambiguous." + "'{key}' is both {level_article} {level_type} level and " + "{label_article} {label_type} label, which is ambiguous." + ).format( + key=key, + level_article=level_article, + level_type=level_type, + label_article=label_article, + label_type=label_type, ) raise ValueError(msg) @@ -1718,8 +1731,12 @@ def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray: label_axis_name = "column" if axis == 0 else "index" raise ValueError( ( - f"The {label_axis_name} label '{key}' " - f"is not unique.{multi_message}" + "The {label_axis_name} label '{key}' " + "is not unique.{multi_message}" + ).format( + key=key, + label_axis_name=label_axis_name, + multi_message=multi_message, ) ) @@ -1763,8 +1780,8 @@ def _drop_labels_or_levels(self, keys, axis: int = 0): raise ValueError( ( "The following keys are not valid labels or " - f"levels for axis {axis}: {invalid_keys}" - ) + "levels for axis {axis}: {invalid_keys}" + ).format(axis=axis, invalid_keys=invalid_keys) ) # Compute levels and labels to drop @@ -1981,7 +1998,7 @@ def __setstate__(self, state): def __repr__(self) -> str: # string representation based upon iterating over self # (since, by definition, `PandasContainers` are iterable) - prepr = f"[{','.join(map(pprint_thing, self))}]" + prepr = "[%s]" % ",".join(map(pprint_thing, self)) return f"{type(self).__name__}({prepr})" def _repr_latex_(self): @@ -3929,13 +3946,13 @@ def _drop_axis(self, labels, axis, level=None, errors: str = "raise"): # GH 18561 MultiIndex.drop should raise if label is absent if errors == "raise" and indexer.all(): - raise KeyError(f"{labels} not found in axis") + raise KeyError("{} not found in axis".format(labels)) else: indexer = ~axis.isin(labels) # Check if label doesn't exist along axis labels_missing = (axis.get_indexer_for(labels) == -1).any() if errors == "raise" and labels_missing: - raise KeyError(f"{labels} not found in axis") + raise KeyError("{} not found in axis".format(labels)) slicer = [slice(None)] * self.ndim slicer[self._get_axis_number(axis_name)] = indexer @@ -4459,7 +4476,7 @@ def reindex(self, *args, **kwargs): if kwargs: raise TypeError( "reindex() got an unexpected keyword " - f'argument "{list(kwargs.keys())[0]}"' + 'argument "{0}"'.format(list(kwargs.keys())[0]) ) self._consolidate_inplace() @@ -5980,7 +5997,7 @@ def fillna( raise TypeError( '"value" parameter must be a scalar, dict ' "or Series, but you passed a " - f'"{type(value).__name__}"' + '"{0}"'.format(type(value).__name__) ) new_data = self._data.fillna( @@ -6764,9 +6781,9 @@ def interpolate( if method not in methods and not is_numeric_or_datetime: raise ValueError( "Index column must be numeric or datetime type when " - f"using {method} method other than linear. " + "using {method} method other than linear. " "Try setting a numeric or datetime index column before " - "interpolating." + "interpolating.".format(method=method) ) if isna(index).any(): @@ -9188,7 +9205,7 @@ def _tz_convert(ax, tz): ax = ax.set_levels(new_level, level=level) else: if level not in (None, 0, ax.name): - raise ValueError(f"The level {level} is not valid") + raise ValueError("The level {0} is not valid".format(level)) ax = _tz_convert(ax, tz) result = self._constructor(self._data, copy=copy) @@ -9358,7 +9375,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent): ax = ax.set_levels(new_level, level=level) else: if level not in (None, 0, ax.name): - raise ValueError(f"The level {level} is not valid") + raise ValueError("The level {0} is not valid".format(level)) ax = _tz_localize(ax, tz, ambiguous, nonexistent) result = self._constructor(self._data, copy=copy) @@ -10340,8 +10357,8 @@ def last_valid_index(self): def _doc_parms(cls): """Return a tuple of the doc parms.""" - axis_descr = ( - f"{{{', '.join(f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS))}}}" + axis_descr = "{%s}" % ", ".join( + "{0} ({1})".format(a, i) for i, a in enumerate(cls._AXIS_ORDERS) ) name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" name2 = cls.__name__ diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 700d8d503d086..e088400b25f0f 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -1,5 +1,5 @@ """ -Provide basic components for groupby. These definitions +Provide basic components for groupby. These defintiions hold the whitelist of methods that are exposed on the SeriesGroupBy and the DataFrameGroupBy objects. """ diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index ac1b0ab766a03..433bca940c028 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -144,9 +144,10 @@ def validate_indices(indices: np.ndarray, n: int) -> None: if len(indices): min_idx = indices.min() if min_idx < -1: - raise ValueError( - f"'indices' contains values less than allowed ({min_idx} < -1)" + msg = "'indices' contains values less than allowed ({} < {})".format( + min_idx, -1 ) + raise ValueError(msg) max_idx = indices.max() if max_idx >= n: diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index db774a03c02f8..ae27aad3dda08 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -26,7 +26,8 @@ class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): def __init__(self, data, orig): if not isinstance(data, ABCSeries): raise TypeError( - f"cannot convert an object of type {type(data)} to a datetimelike index" + f"cannot convert an object of type {type(data)} to a " + "datetimelike index" ) self._parent = data @@ -90,8 +91,9 @@ def _delegate_property_get(self, name): def _delegate_property_set(self, name, value, *args, **kwargs): raise ValueError( - "modifications to a property of a datetimelike object are not supported. " - "Change values on the original." + "modifications to a property of a datetimelike " + "object are not supported. Change values on the " + "original." ) def _delegate_method(self, name, *args, **kwargs): @@ -220,7 +222,7 @@ def to_pytimedelta(self): Returns ------- - numpy.ndarray + a : numpy.ndarray Array of 1D containing data with `datetime.timedelta` type. See Also @@ -312,7 +314,8 @@ def __new__(cls, data): if not isinstance(data, ABCSeries): raise TypeError( - f"cannot convert an object of type {type(data)} to a datetimelike index" + f"cannot convert an object of type {type(data)} to a " + "datetimelike index" ) orig = data if is_categorical_dtype(data) else None diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 1904456848396..e99ae96f35315 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -124,6 +124,7 @@ def _get_combined_index( ------- Index """ + # TODO: handle index names! indexes = _get_distinct_objs(indexes) if len(indexes) == 0: @@ -272,6 +273,7 @@ def get_consensus_names(indexes): list A list representing the consensus 'names' found. """ + # find the non-none names, need to tupleify to make # the set hashable, then reverse on return consensus_names = {tuple(i.names) for i in indexes if com.any_not_none(*i.names)} diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ce7a238daeca9..5abd049b9564c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -804,10 +804,11 @@ def _assert_take_fillable( # only fill if we are passing a non-None fill_value if allow_fill and fill_value is not None: if (indices < -1).any(): - raise ValueError( + msg = ( "When allow_fill=True and fill_value is not None, " "all indices must be >= -1" ) + raise ValueError(msg) taken = algos.take( values, indices, allow_fill=allow_fill, fill_value=na_value ) @@ -1323,7 +1324,8 @@ def set_names(self, names, level=None, inplace=False): raise ValueError("Level must be None for non-MultiIndex") if level is not None and not is_list_like(level) and is_list_like(names): - raise TypeError("Names must be a string when a single level is provided.") + msg = "Names must be a string when a single level is provided." + raise TypeError(msg) if not is_list_like(names) and level is None and self.nlevels > 1: raise TypeError("Must pass list-like as `names`.") @@ -1419,8 +1421,8 @@ def _validate_index_level(self, level): if isinstance(level, int): if level < 0 and level != -1: raise IndexError( - "Too many levels: Index has only 1 level, " - f"{level} is not a valid level number" + f"Too many levels: Index has only 1 level," + f" {level} is not a valid level number" ) elif level > 0: raise IndexError( diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d35117b8db86e..44478d00da9cf 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -715,7 +715,9 @@ def _convert_list_indexer(self, keyarr, kind=None): indexer = self.categories.get_indexer(np.asarray(keyarr)) if (indexer == -1).any(): raise KeyError( - "a list-indexer must only include values that are in the categories" + "a list-indexer must only " + "include values that are " + "in the categories" ) return self.get_indexer(keyarr) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3bf6dce00a031..50dbddec5c8b2 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -2,7 +2,7 @@ Base and utility classes for tseries type pandas objects. """ import operator -from typing import List, Set +from typing import Set import numpy as np @@ -73,7 +73,7 @@ def method(self, other): class DatetimeIndexOpsMixin(ExtensionOpsMixin): """ - Common ops mixin to support a unified interface datetimelike Index. + common ops mixin to support a unified interface datetimelike Index """ _data: ExtensionArray @@ -336,7 +336,7 @@ def _convert_tolerance(self, tolerance, target): raise ValueError("list-like tolerance size must match target index size") return tolerance - def tolist(self) -> List: + def tolist(self): """ Return a list of the underlying data. """ @@ -661,12 +661,11 @@ def _summary(self, name=None): Parameters ---------- name : str - Name to use in the summary representation. + name to use in the summary representation Returns ------- - str - Summarized representation of the index. + String with a summarized representation of the index """ formatter = self._formatter_func if len(self) > 0: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1fd962dd24656..523c434cb7377 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -45,10 +45,9 @@ def _new_DatetimeIndex(cls, d): - """ - This is called upon unpickling, rather than the default which doesn't - have arguments and breaks __new__ - """ + """ This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__ """ + if "data" in d and not isinstance(d["data"], DatetimeIndex): # Avoid need to verify integrity by calling simple_new directly data = d.pop("data") @@ -101,9 +100,9 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, DatetimeDelegateMixin): Parameters ---------- - data : array-like (1-dimensional), optional + data : array-like (1-dimensional), optional Optional datetime-like data to construct index with. - copy : bool + copy : bool Make a copy of input ndarray. freq : str or pandas offset object, optional One of pandas date offset strings or corresponding objects. The string @@ -274,7 +273,7 @@ def __new__( @classmethod def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): """ - We require the we have a dtype compat for the values + we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ if isinstance(values, DatetimeArray): @@ -346,13 +345,7 @@ def tz(self, value): @cache_readonly def _is_dates_only(self) -> bool: - """ - Return a boolean if we are only dates (and don't have a timezone) - - Returns - ------- - bool - """ + """Return a boolean if we are only dates (and don't have a timezone)""" from pandas.io.formats.format import _is_dates_only return _is_dates_only(self.values) and self.tz is None @@ -367,9 +360,7 @@ def __reduce__(self): return _new_DatetimeIndex, (type(self), d), None def __setstate__(self, state): - """ - Necessary for making this object picklable. - """ + """Necessary for making this object picklable""" if isinstance(state, dict): super().__setstate__(state) @@ -402,9 +393,7 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ def _convert_for_op(self, value): - """ - Convert value to be insertable to ndarray. - """ + """ Convert value to be insertable to ndarray """ if self._has_same_tz(value): return _to_M8(value) raise ValueError("Passed item and index have different timezone") @@ -472,7 +461,7 @@ def _union(self, other, sort): def union_many(self, others): """ - A bit of a hack to accelerate unioning a collection of indexes. + A bit of a hack to accelerate unioning a collection of indexes """ this = self @@ -500,7 +489,7 @@ def union_many(self, others): this._data._dtype = dtype return this - def _can_fast_union(self, other) -> bool: + def _can_fast_union(self, other): if not isinstance(other, DatetimeIndex): return False @@ -592,7 +581,7 @@ def intersection(self, other, sort=False): Returns ------- - Index or DatetimeIndex or TimedeltaIndex + y : Index or DatetimeIndex or TimedeltaIndex """ return super().intersection(other, sort=sort) @@ -710,9 +699,7 @@ def snap(self, freq="S"): # we know it conforms; skip check return DatetimeIndex._simple_new(snapped, name=self.name, tz=self.tz, freq=freq) - def join( - self, other, how: str = "left", level=None, return_indexers=False, sort=False - ): + def join(self, other, how="left", level=None, return_indexers=False, sort=False): """ See Index.join """ @@ -853,8 +840,9 @@ def _parsed_string_to_bounds(self, reso, parsed): if parsed.tzinfo is not None: if self.tz is None: raise ValueError( - "The index must be timezone aware when indexing " - "with a date string with a UTC offset" + "The index must be timezone aware " + "when indexing with a date string with a " + "UTC offset" ) start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz) end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz) @@ -863,16 +851,7 @@ def _parsed_string_to_bounds(self, reso, parsed): end = end.tz_localize(self.tz) return start, end - def _partial_date_slice( - self, reso: str, parsed, use_lhs: bool = True, use_rhs: bool = True - ): - """ - Parameters - ---------- - reso : str - use_lhs : bool, default True - use_rhs : bool, default True - """ + def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): is_monotonic = self.is_monotonic if ( is_monotonic diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 909643d50e9d7..fd8ab74ed4920 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -35,7 +35,7 @@ def union(self, other) -> "FrozenList": Returns ------- - FrozenList + diff : FrozenList The collection difference between self and other. """ if isinstance(other, tuple): @@ -53,7 +53,7 @@ def difference(self, other) -> "FrozenList": Returns ------- - FrozenList + diff : FrozenList The collection difference between self and other. """ other = set(other) @@ -92,9 +92,7 @@ def __hash__(self): return hash(tuple(self)) def _disabled(self, *args, **kwargs): - """ - This method will not function because object is immutable. - """ + """This method will not function because object is immutable.""" raise TypeError(f"'{type(self).__name__}' does not support mutable operations.") def __str__(self) -> str: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index dee4c959f8c90..f046f0d89c428 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -143,19 +143,21 @@ def func(intvidx_self, other, sort=False): result = result.astype(intvidx_self.dtype) return result elif intvidx_self.closed != other.closed: - raise ValueError( + msg = ( "can only do set operations between two IntervalIndex " "objects that are closed on the same side" ) + raise ValueError(msg) # GH 19016: ensure set op will not return a prohibited dtype subtypes = [intvidx_self.dtype.subtype, other.dtype.subtype] common_subtype = find_common_type(subtypes) if is_object_dtype(common_subtype): - raise TypeError( + msg = ( f"can only do {self.op_name} between two IntervalIndex " "objects that have compatible dtypes" ) + raise TypeError(msg) return setop(intvidx_self, other, sort) @@ -208,13 +210,7 @@ class IntervalIndex(IntervalMixin, Index): # Constructors def __new__( - cls, - data, - closed=None, - dtype=None, - copy: bool = False, - name=None, - verify_integrity: bool = True, + cls, data, closed=None, dtype=None, copy=False, name=None, verify_integrity=True ): if name is None and hasattr(data, "name"): @@ -267,9 +263,7 @@ def _simple_new(cls, array, name, closed=None): ), ) ) - def from_breaks( - cls, breaks, closed: str = "right", name=None, copy: bool = False, dtype=None - ): + def from_breaks(cls, breaks, closed="right", name=None, copy=False, dtype=None): with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_breaks( breaks, closed=closed, copy=copy, dtype=dtype @@ -294,13 +288,7 @@ def from_breaks( ) ) def from_arrays( - cls, - left, - right, - closed: str = "right", - name=None, - copy: bool = False, - dtype=None, + cls, left, right, closed="right", name=None, copy=False, dtype=None ): with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray.from_arrays( @@ -325,9 +313,7 @@ def from_arrays( ), ) ) - def from_tuples( - cls, data, closed: str = "right", name=None, copy: bool = False, dtype=None - ): + def from_tuples(cls, data, closed="right", name=None, copy=False, dtype=None): with rewrite_exception("IntervalArray", cls.__name__): arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) return cls._simple_new(arr, name=name) @@ -343,9 +329,7 @@ def _shallow_copy(self, left=None, right=None, **kwargs): @cache_readonly def _isnan(self): - """ - Return a mask indicating if each value is NA. - """ + """Return a mask indicating if each value is NA""" if self._mask is None: self._mask = isna(self.left) return self._mask @@ -367,7 +351,7 @@ def __contains__(self, key) -> bool: Returns ------- - bool + boolean """ if not isinstance(key, Interval): return False @@ -486,9 +470,7 @@ def _ndarray_values(self) -> np.ndarray: return np.array(self._data) def __array__(self, result=None): - """ - The array interface, return my values. - """ + """ the array interface, return my values """ return self._ndarray_values def __array_wrap__(self, result, context=None): @@ -521,9 +503,7 @@ def astype(self, dtype, copy=True): @cache_readonly def dtype(self): - """ - Return the dtype object of the underlying data. - """ + """Return the dtype object of the underlying data""" return self._data.dtype @property @@ -571,7 +551,7 @@ def is_monotonic_decreasing(self) -> bool: @cache_readonly def is_unique(self): """ - Return True if the IntervalIndex contains unique elements, else False. + Return True if the IntervalIndex contains unique elements, else False """ left = self.left right = self.right @@ -728,7 +708,7 @@ def _needs_i8_conversion(self, key): Returns ------- - bool + boolean """ if is_interval_dtype(key) or isinstance(key, Interval): return self._needs_i8_conversion(key.left) @@ -749,7 +729,7 @@ def _maybe_convert_i8(self, key): Returns ------- - scalar or list-like + key: scalar or list-like The original key if no conversion occurred, int if converted scalar, Int64Index if converted list-like. """ @@ -795,21 +775,22 @@ def _check_method(self, method): return if method in ["bfill", "backfill", "pad", "ffill", "nearest"]: - raise NotImplementedError( - f"method {method} not yet implemented for IntervalIndex" - ) + msg = f"method {method} not yet implemented for IntervalIndex" + raise NotImplementedError(msg) raise ValueError("Invalid fill method") def _searchsorted_monotonic(self, label, side, exclude_label=False): if not self.is_non_overlapping_monotonic: raise KeyError( - "can only get slices from an IntervalIndex if bounds are " - "non-overlapping and all monotonic increasing or decreasing" + "can only get slices from an IntervalIndex if " + "bounds are non-overlapping and all monotonic " + "increasing or decreasing" ) if isinstance(label, IntervalMixin): - raise NotImplementedError("Interval objects are not currently supported") + msg = "Interval objects are not currently supported" + raise NotImplementedError(msg) # GH 20921: "not is_monotonic_increasing" for the second condition # instead of "is_monotonic_decreasing" to account for single element @@ -869,7 +850,7 @@ def get_loc( Returns ------- - int if unique index, slice if monotonic index, else mask + loc : int if unique index, slice if monotonic index, else mask Examples -------- @@ -952,10 +933,11 @@ def get_indexer( self._check_method(method) if self.is_overlapping: - raise InvalidIndexError( - "cannot handle overlapping indices; " - "use IntervalIndex.get_indexer_non_unique" + msg = ( + "cannot handle overlapping indices; use " + "IntervalIndex.get_indexer_non_unique" ) + raise InvalidIndexError(msg) target_as_index = ensure_index(target) @@ -1089,7 +1071,7 @@ def delete(self, loc): Returns ------- - IntervalIndex + new_index : IntervalIndex """ new_left = self.left.delete(loc) new_right = self.right.delete(loc) @@ -1108,7 +1090,7 @@ def insert(self, loc, item): Returns ------- - IntervalIndex + new_index : IntervalIndex """ if isinstance(item, Interval): if item.closed != self.closed: @@ -1135,10 +1117,11 @@ def _concat_same_dtype(self, to_concat, name): we allow a 0-len index here as well """ if not len({i.closed for i in to_concat if len(i)}) == 1: - raise ValueError( + msg = ( "can only append two IntervalIndex objects " "that are closed on the same side" ) + raise ValueError(msg) return super()._concat_same_dtype(to_concat, name) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) @@ -1192,13 +1175,10 @@ def _format_data(self, name=None): n = min(max_seq_items // 2, 10) head = [formatter(x) for x in self[:n]] tail = [formatter(x) for x in self[-n:]] - head_joined = ", ".join(head) - tail_joined = ", ".join(tail) - summary = f"[{head_joined} ... {tail_joined}]" + summary = f"[{', '.join(head)} ... {', '.join(tail)}]" else: tail = [formatter(x) for x in self] - joined = ", ".join(tail) - summary = f"[{joined}]" + summary = f"[{', '.join(tail)}]" return summary + "," + self._format_space() @@ -1209,7 +1189,7 @@ def _format_attrs(self): attrs.append(("dtype", f"'{self.dtype}'")) return attrs - def _format_space(self) -> str: + def _format_space(self): space = " " * (len(type(self).__name__) + 1) return f"\n{space}" @@ -1220,7 +1200,7 @@ def argsort(self, *args, **kwargs): def equals(self, other) -> bool: """ - Determines if two IntervalIndex objects contain the same elements. + Determines if two IntervalIndex objects contain the same elements """ if self.is_(other): return True @@ -1308,7 +1288,7 @@ def _intersection_unique(self, other: "IntervalIndex") -> "IntervalIndex": Returns ------- - IntervalIndex + taken : IntervalIndex """ lindexer = self.left.get_indexer(other.left) rindexer = self.right.get_indexer(other.right) @@ -1330,7 +1310,7 @@ def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex": Returns ------- - IntervalIndex + taken : IntervalIndex """ mask = np.zeros(len(self), dtype=bool) @@ -1380,9 +1360,7 @@ def is_all_dates(self) -> bool: def _is_valid_endpoint(endpoint) -> bool: - """ - Helper for interval_range to check if start/end are valid types. - """ + """helper for interval_range to check if start/end are valid types""" return any( [ is_number(endpoint), @@ -1394,9 +1372,7 @@ def _is_valid_endpoint(endpoint) -> bool: def _is_type_compatible(a, b) -> bool: - """ - Helper for interval_range to check type compat of start/end/freq. - """ + """helper for interval_range to check type compat of start/end/freq""" is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) return ( diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 05a4da28eb0a1..9e434d0f5f704 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1,7 +1,6 @@ from collections import OrderedDict import datetime from sys import getsizeof -from typing import List, Optional import warnings import numpy as np @@ -86,7 +85,7 @@ def _codes_to_ints(self, codes): Returns ------- - scalar or 1-dimensional array, of dtype uint64 + int_keys : scalar or 1-dimensional array, of dtype uint64 Integer(s) representing one combination (each). """ # Shift the representation of each level by the pre-calculated number @@ -126,7 +125,7 @@ def _codes_to_ints(self, codes): Returns ------- - int, or 1-dimensional array of dtype object + int_keys : int, or 1-dimensional array of dtype object Integer(s) representing one combination (each). """ @@ -249,8 +248,8 @@ def __new__( dtype=None, copy=False, name=None, - verify_integrity: bool = True, - _set_identity: bool = True, + verify_integrity=True, + _set_identity=True, ): # compat with Index @@ -288,7 +287,7 @@ def __new__( return result - def _validate_codes(self, level: List, code: List): + def _validate_codes(self, level: list, code: list): """ Reassign code values as -1 if their corresponding levels are NaN. @@ -301,7 +300,7 @@ def _validate_codes(self, level: List, code: List): Returns ------- - new code where code value = -1 if it corresponds + code : new code where code value = -1 if it corresponds to a level with missing values (NaN, NaT, None). """ null_mask = isna(level) @@ -309,10 +308,9 @@ def _validate_codes(self, level: List, code: List): code = np.where(null_mask[code], -1, code) return code - def _verify_integrity( - self, codes: Optional[List] = None, levels: Optional[List] = None - ): + def _verify_integrity(self, codes=None, levels=None): """ + Parameters ---------- codes : optional list @@ -328,7 +326,7 @@ def _verify_integrity( Returns ------- - new codes where code value = -1 if it corresponds to a + codes : new codes where code value = -1 if it corresponds to a NaN level. """ # NOTE: Currently does not check, among other things, that cached @@ -338,8 +336,8 @@ def _verify_integrity( if len(levels) != len(codes): raise ValueError( - "Length of levels and codes must match. NOTE: " - "this index is in an inconsistent state." + "Length of levels and codes must match. NOTE:" + " this index is in an inconsistent state." ) codes_length = len(codes[0]) for i, (level, level_codes) in enumerate(zip(levels, codes)): @@ -391,7 +389,7 @@ def from_arrays(cls, arrays, sortorder=None, names=_no_default_names): Returns ------- - MultiIndex + index : MultiIndex See Also -------- @@ -456,7 +454,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): Returns ------- - MultiIndex + index : MultiIndex See Also -------- @@ -483,7 +481,8 @@ def from_tuples(cls, tuples, sortorder=None, names=None): if len(tuples) == 0: if names is None: - raise TypeError("Cannot infer number of levels from empty list") + msg = "Cannot infer number of levels from empty list" + raise TypeError(msg) arrays = [[]] * len(names) elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): @@ -519,7 +518,7 @@ def from_product(cls, iterables, sortorder=None, names=_no_default_names): Returns ------- - MultiIndex + index : MultiIndex See Also -------- @@ -654,15 +653,15 @@ def array(self): ------ ValueError """ - raise ValueError( + msg = ( "MultiIndex has no single backing array. Use " "'MultiIndex.to_numpy()' to get a NumPy array of tuples." ) + raise ValueError(msg) @property def _is_homogeneous_type(self) -> bool: - """ - Whether the levels of a MultiIndex all have the same dtype. + """Whether the levels of a MultiIndex all have the same dtype. This looks at the dtypes of the levels. @@ -733,7 +732,7 @@ def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): Level(s) to set (None for all levels). inplace : bool If True, mutates in place. - verify_integrity : bool, default True + verify_integrity : bool (default True) If True, checks that levels and codes are compatible. Returns diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index b84c69b8caf51..048bff46759bc 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -85,9 +85,8 @@ def _validate_dtype(cls, dtype: Dtype) -> None: validation_func, expected = validation_metadata[cls._typ] if not validation_func(dtype): - raise ValueError( - f"Incorrect `dtype` passed: expected {expected}, received {dtype}" - ) + msg = f"Incorrect `dtype` passed: expected {expected}, received {dtype}" + raise ValueError(msg) @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) def _maybe_cast_slice_bound(self, label, side, kind): @@ -107,6 +106,7 @@ def _convert_for_op(self, value): """ Convert value to be insertable to ndarray. """ + if is_bool(value) or is_bool_dtype(value): # force conversion to object # so we don't lose the bools @@ -121,13 +121,17 @@ def _convert_tolerance(self, tolerance, target): if not np.issubdtype(tolerance.dtype, np.number): if tolerance.ndim > 0: raise ValueError( - f"tolerance argument for {type(self).__name__} must contain " - "numeric elements if it is list type" + ( + f"tolerance argument for {type(self).__name__} must contain " + "numeric elements if it is list type" + ) ) else: raise ValueError( - f"tolerance argument for {type(self).__name__} must be numeric " - f"if it is a scalar: {repr(tolerance)}" + ( + f"tolerance argument for {type(self).__name__} must be numeric " + f"if it is a scalar: {repr(tolerance)}" + ) ) return tolerance @@ -240,9 +244,7 @@ class Int64Index(IntegerIndex): @property def inferred_type(self) -> str: - """ - Always 'integer' for ``Int64Index`` - """ + """Always 'integer' for ``Int64Index``""" return "integer" @property @@ -297,9 +299,7 @@ class UInt64Index(IntegerIndex): @property def inferred_type(self) -> str: - """ - Always 'integer' for ``UInt64Index`` - """ + """Always 'integer' for ``UInt64Index``""" return "integer" @property @@ -374,9 +374,7 @@ class Float64Index(NumericIndex): @property def inferred_type(self) -> str: - """ - Always 'floating' for ``Float64Index`` - """ + """Always 'floating' for ``Float64Index``""" return "floating" @Appender(_index_shared_docs["astype"]) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b86293e78a80d..b31973de5bca0 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -232,7 +232,7 @@ def _has_valid_tuple(self, key: Tuple): except ValueError: raise ValueError( "Location based indexing can only have " - f"[{self._valid_types}] types" + "[{types}] types".format(types=self._valid_types) ) def _is_nested_tuple_indexer(self, tup: Tuple) -> bool: @@ -286,7 +286,7 @@ def _has_valid_positional_setitem_indexer(self, indexer) -> bool: bool """ if isinstance(indexer, dict): - raise IndexError(f"{self.name} cannot enlarge its target object") + raise IndexError("{0} cannot enlarge its target object".format(self.name)) else: if not isinstance(indexer, tuple): indexer = _tuplify(self.ndim, indexer) @@ -300,10 +300,13 @@ def _has_valid_positional_setitem_indexer(self, indexer) -> bool: elif is_integer(i): if i >= len(ax): raise IndexError( - f"{self.name} cannot enlarge its target object" + "{name} cannot enlarge its target " + "object".format(name=self.name) ) elif isinstance(i, dict): - raise IndexError(f"{self.name} cannot enlarge its target object") + raise IndexError( + "{name} cannot enlarge its target object".format(name=self.name) + ) return True @@ -1163,14 +1166,17 @@ def _validate_read_indexer( if missing: if missing == len(indexer): - axis_name = self.obj._get_axis_name(axis) - raise KeyError(f"None of [{key}] are in the [{axis_name}]") + raise KeyError( + "None of [{key}] are in the [{axis}]".format( + key=key, axis=self.obj._get_axis_name(axis) + ) + ) # We (temporarily) allow for some missing keys with .loc, except in # some cases (e.g. setting) in which "raise_missing" will be False if not (self.name == "loc" and not raise_missing): not_found = list(set(key) - set(ax)) - raise KeyError(f"{not_found} not in index") + raise KeyError("{} not in index".format(not_found)) # we skip the warning on Categorical/Interval # as this check is actually done (check for @@ -1899,13 +1905,18 @@ def _validate_key(self, key, axis: int): # check that the key has a numeric dtype if not is_numeric_dtype(arr.dtype): - raise IndexError(f".iloc requires numeric indexers, got {arr}") + raise IndexError( + ".iloc requires numeric indexers, got {arr}".format(arr=arr) + ) # check that the key does not exceed the maximum size of the index if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis): raise IndexError("positional indexers are out-of-bounds") else: - raise ValueError(f"Can only index by location with a [{self._valid_types}]") + raise ValueError( + "Can only index by location with " + "a [{types}]".format(types=self._valid_types) + ) def _has_valid_setitem_indexer(self, indexer): self._has_valid_positional_setitem_indexer(indexer) @@ -2052,7 +2063,10 @@ def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): self._validate_key(obj, axis) return obj except ValueError: - raise ValueError(f"Can only index by location with a [{self._valid_types}]") + raise ValueError( + "Can only index by location with " + "a [{types}]".format(types=self._valid_types) + ) class _ScalarAccessIndexer(_NDFrameIndexerBase): @@ -2313,7 +2327,7 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: # GH26658 if len(result) != len(index): raise IndexError( - f"Item wrong length {len(result)} instead of {len(index)}." + "Item wrong length {} instead of {}.".format(len(result), len(index)) ) return result diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index eb5b5181d894d..610a39a05148b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -115,8 +115,8 @@ def __init__(self, values, placement, ndim=None): if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values): raise ValueError( - f"Wrong number of items passed {len(self.values)}, " - f"placement implies {len(self.mgr_locs)}" + "Wrong number of items passed {val}, placement implies " + "{mgr}".format(val=len(self.values), mgr=len(self.mgr_locs)) ) def _check_ndim(self, values, ndim): @@ -144,10 +144,9 @@ def _check_ndim(self, values, ndim): ndim = values.ndim if self._validate_ndim and values.ndim != ndim: - raise ValueError( - "Wrong number of dimensions. " - f"values.ndim != ndim [{values.ndim} != {ndim}]" - ) + msg = "Wrong number of dimensions. values.ndim != ndim [{} != {}]" + raise ValueError(msg.format(values.ndim, ndim)) + return ndim @property @@ -185,7 +184,7 @@ def is_categorical_astype(self, dtype): if dtype is Categorical or dtype is CategoricalDtype: # this is a pd.Categorical, but is not # a valid type for astypeing - raise TypeError(f"invalid type {dtype} for astype") + raise TypeError("invalid type {0} for astype".format(dtype)) elif is_categorical_dtype(dtype): return True @@ -265,14 +264,18 @@ def __repr__(self) -> str: name = type(self).__name__ if self._is_single_block: - result = f"{name}: {len(self)} dtype: {self.dtype}" + result = "{name}: {len} dtype: {dtype}".format( + name=name, len=len(self), dtype=self.dtype + ) else: shape = " x ".join(pprint_thing(s) for s in self.shape) - result = ( - f"{name}: {pprint_thing(self.mgr_locs.indexer)}, " - f"{shape}, dtype: {self.dtype}" + result = "{name}: {index}, {shape}, dtype: {dtype}".format( + name=name, + index=pprint_thing(self.mgr_locs.indexer), + shape=shape, + dtype=self.dtype, ) return result @@ -326,7 +329,7 @@ def ftype(self): dtype = self.dtype.subtype else: dtype = self.dtype - return f"{dtype}:{self._ftype}" + return "{dtype}:{ftype}".format(dtype=dtype, ftype=self._ftype) def merge(self, other): return _merge_blocks([self, other]) @@ -541,15 +544,15 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): if errors not in errors_legal_values: invalid_arg = ( - "Expected value of kwarg 'errors' to be one of " - f"{list(errors_legal_values)}. Supplied value is '{errors}'" + "Expected value of kwarg 'errors' to be one of {}. " + "Supplied value is '{}'".format(list(errors_legal_values), errors) ) raise ValueError(invalid_arg) if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): msg = ( - f"Expected an instance of {dtype.__name__}, " - "but got the class instead. Try instantiating 'dtype'." + "Expected an instance of {}, but got the class instead. " + "Try instantiating 'dtype'.".format(dtype.__name__) ) raise TypeError(msg) @@ -610,9 +613,15 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): if newb.is_numeric and self.is_numeric: if newb.shape != self.shape: raise TypeError( - f"cannot set astype for copy = [{copy}] for dtype " - f"({self.dtype.name} [{self.shape}]) to different shape " - f"({newb.dtype.name} [{newb.shape}])" + "cannot set astype for copy = [{copy}] for dtype " + "({dtype} [{shape}]) to different shape " + "({newb_dtype} [{newb_shape}])".format( + copy=copy, + dtype=self.dtype.name, + shape=self.shape, + newb_dtype=newb.dtype.name, + newb_shape=newb.shape, + ) ) return newb @@ -649,7 +658,7 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): if not self.is_object and not quoting: itemsize = writers.word_len(na_rep) - values = values.astype(f" str: output = type(self).__name__ for i, ax in enumerate(self.axes): if i == 0: - output += f"\nItems: {ax}" + output += "\nItems: {ax}".format(ax=ax) else: - output += f"\nAxis {i}: {ax}" + output += "\nAxis {i}: {ax}".format(i=i, ax=ax) for block in self.blocks: - output += f"\n{pprint_thing(block)}" + output += "\n{block}".format(block=pprint_thing(block)) return output def _verify_integrity(self): @@ -336,8 +336,8 @@ def _verify_integrity(self): if len(self.items) != tot_items: raise AssertionError( "Number of manager items must equal union of " - f"block items\n# manager items: {len(self.items)}, # " - f"tot_items: {tot_items}" + "block items\n# manager items: {0}, # " + "tot_items: {1}".format(len(self.items), tot_items) ) def apply(self, f: str, filter=None, **kwargs): @@ -1140,7 +1140,7 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False): """ if not allow_duplicates and item in self.items: # Should this be a different kind of error?? - raise ValueError(f"cannot insert {item}, already exists") + raise ValueError("cannot insert {}, already exists".format(item)) if not isinstance(loc, int): raise TypeError("loc must be int") @@ -1661,7 +1661,9 @@ def construction_error(tot_items, block_shape, axes, e=None): raise e if block_shape[0] == 0: raise ValueError("Empty data passed with indices specified.") - raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + raise ValueError( + "Shape of passed values is {0}, indices imply {1}".format(passed, implied) + ) # ----------------------------------------------------------------------- @@ -1897,10 +1899,10 @@ def _compare_or_regex_search(a, b, regex=False): type_names = [type(a).__name__, type(b).__name__] if is_a_array: - type_names[0] = f"ndarray(dtype={a.dtype})" + type_names[0] = "ndarray(dtype={dtype})".format(dtype=a.dtype) if is_b_array: - type_names[1] = f"ndarray(dtype={b.dtype})" + type_names[1] = "ndarray(dtype={dtype})".format(dtype=b.dtype) raise TypeError( f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py index 8fb81faf313d7..fd91e78451da9 100644 --- a/pandas/core/ops/mask_ops.py +++ b/pandas/core/ops/mask_ops.py @@ -1,5 +1,5 @@ """ -Ops for masked arrays. +Ops for masked ararys. """ from typing import Optional, Union diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2294c846e81c7..67f06ea7bea6a 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -90,11 +90,13 @@ def __str__(self) -> str: Provide a nice str repr of our rolling object. """ attrs = ( - f"{k}={getattr(self.groupby, k)}" + "{k}={v}".format(k=k, v=getattr(self.groupby, k)) for k in self._attributes if getattr(self.groupby, k, None) is not None ) - return f"{type(self).__name__} [{', '.join(attrs)}]" + return "{klass} [{attrs}]".format( + klass=type(self).__name__, attrs=", ".join(attrs) + ) def __getattr__(self, attr): if attr in self._internal_names_set: @@ -1186,8 +1188,8 @@ def _downsample(self, how, **kwargs): return self.asfreq() raise IncompatibleFrequency( - f"Frequency {ax.freq} cannot be resampled to {self.freq}, " - "as they are not sub or super periods" + "Frequency {} cannot be resampled to {}, as they are not " + "sub or super periods".format(ax.freq, self.freq) ) def _upsample(self, method, limit=None, fill_value=None): @@ -1331,11 +1333,11 @@ def __init__( # Check for correctness of the keyword arguments which would # otherwise silently use the default if misspelled if label not in {None, "left", "right"}: - raise ValueError(f"Unsupported value {label} for `label`") + raise ValueError("Unsupported value {} for `label`".format(label)) if closed not in {None, "left", "right"}: - raise ValueError(f"Unsupported value {closed} for `closed`") + raise ValueError("Unsupported value {} for `closed`".format(closed)) if convention not in {None, "start", "end", "e", "s"}: - raise ValueError(f"Unsupported value {convention} for `convention`") + raise ValueError("Unsupported value {} for `convention`".format(convention)) freq = to_offset(freq) @@ -1405,7 +1407,7 @@ def _get_resampler(self, obj, kind=None): raise TypeError( "Only valid with DatetimeIndex, " "TimedeltaIndex or PeriodIndex, " - f"but got an instance of '{type(ax).__name__}'" + "but got an instance of '{typ}'".format(typ=type(ax).__name__) ) def _get_grouper(self, obj, validate=True): @@ -1418,7 +1420,7 @@ def _get_time_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError( "axis must be a DatetimeIndex, but got " - f"an instance of {type(ax).__name__}" + "an instance of {typ}".format(typ=type(ax).__name__) ) if len(ax) == 0: @@ -1494,7 +1496,7 @@ def _get_time_delta_bins(self, ax): if not isinstance(ax, TimedeltaIndex): raise TypeError( "axis must be a TimedeltaIndex, but got " - f"an instance of {type(ax).__name__}" + "an instance of {typ}".format(typ=type(ax).__name__) ) if not len(ax): @@ -1519,7 +1521,7 @@ def _get_time_period_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError( "axis must be a DatetimeIndex, but got " - f"an instance of {type(ax).__name__}" + "an instance of {typ}".format(typ=type(ax).__name__) ) freq = self.freq @@ -1541,7 +1543,7 @@ def _get_period_bins(self, ax): if not isinstance(ax, PeriodIndex): raise TypeError( "axis must be a PeriodIndex, but got " - f"an instance of {type(ax).__name__}" + "an instance of {typ}".format(typ=type(ax).__name__) ) memb = ax.asfreq(self.freq, how=self.convention) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 3dfd5fed34741..0fb029c8429a6 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -116,20 +116,20 @@ def _groupby_and_merge( # if we can groupby the rhs # then we can get vastly better perf - - # we will check & remove duplicates if indicated - if check_duplicates: - if on is None: - on = [] - elif not isinstance(on, (list, tuple)): - on = [on] - - if right.duplicated(by + on).any(): - _right = right.drop_duplicates(by + on, keep="last") - # TODO: use overload to refine return type of drop_duplicates - assert _right is not None # needed for mypy - right = _right try: + + # we will check & remove duplicates if indicated + if check_duplicates: + if on is None: + on = [] + elif not isinstance(on, (list, tuple)): + on = [on] + + if right.duplicated(by + on).any(): + _right = right.drop_duplicates(by + on, keep="last") + # TODO: use overload to refine return type of drop_duplicates + assert _right is not None # needed for mypy + right = _right rby = right.groupby(by, sort=False) except KeyError: rby = None diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 0ef39a685f1ce..24e2e674f6ae3 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2,7 +2,7 @@ from functools import wraps import re import textwrap -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Type, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List import warnings import numpy as np @@ -113,7 +113,7 @@ def cat_safe(list_of_columns: List, sep: str): raise TypeError( "Concatenation requires list-likes containing only " "strings (or missing values). Offending values found in " - f"column {dtype}" + "column {}".format(dtype) ) from None return result @@ -142,7 +142,7 @@ def _map_stringarray( The value to use for missing values. By default, this is the original value (NA). dtype : Dtype - The result dtype to use. Specifying this avoids an intermediate + The result dtype to use. Specifying this aviods an intermediate object-dtype allocation. Returns @@ -152,20 +152,14 @@ def _map_stringarray( an ndarray. """ - from pandas.arrays import IntegerArray, StringArray, BooleanArray + from pandas.arrays import IntegerArray, StringArray mask = isna(arr) assert isinstance(arr, StringArray) arr = np.asarray(arr) - if is_integer_dtype(dtype) or is_bool_dtype(dtype): - constructor: Union[Type[IntegerArray], Type[BooleanArray]] - if is_integer_dtype(dtype): - constructor = IntegerArray - else: - constructor = BooleanArray - + if is_integer_dtype(dtype): na_value_is_na = isna(na_value) if na_value_is_na: na_value = 1 @@ -175,13 +169,13 @@ def _map_stringarray( mask.view("uint8"), convert=False, na_value=na_value, - dtype=np.dtype(dtype), + dtype=np.dtype("int64"), ) if not na_value_is_na: mask[:] = False - return constructor(result, mask) + return IntegerArray(result, mask) elif is_string_dtype(dtype) and not is_object_dtype(dtype): # i.e. StringDtype @@ -189,6 +183,7 @@ def _map_stringarray( arr, func, mask.view("uint8"), convert=False, na_value=na_value ) return StringArray(result) + # TODO: BooleanArray else: # This is when the result type is object. We reach this when # -> We know the result type is truly object (e.g. .encode returns bytes @@ -304,7 +299,7 @@ def str_count(arr, pat, flags=0): """ regex = re.compile(pat, flags=flags) f = lambda x: len(regex.findall(x)) - return _na_map(f, arr, dtype="int64") + return _na_map(f, arr, dtype=int) def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): @@ -1355,8 +1350,8 @@ def str_find(arr, sub, start=0, end=None, side="left"): """ if not isinstance(sub, str): - msg = f"expected a string object, not {type(sub).__name__}" - raise TypeError(msg) + msg = "expected a string object, not {0}" + raise TypeError(msg.format(type(sub).__name__)) if side == "left": method = "find" @@ -1370,13 +1365,13 @@ def str_find(arr, sub, start=0, end=None, side="left"): else: f = lambda x: getattr(x, method)(sub, start, end) - return _na_map(f, arr, dtype="int64") + return _na_map(f, arr, dtype=int) def str_index(arr, sub, start=0, end=None, side="left"): if not isinstance(sub, str): - msg = f"expected a string object, not {type(sub).__name__}" - raise TypeError(msg) + msg = "expected a string object, not {0}" + raise TypeError(msg.format(type(sub).__name__)) if side == "left": method = "index" @@ -1390,7 +1385,7 @@ def str_index(arr, sub, start=0, end=None, side="left"): else: f = lambda x: getattr(x, method)(sub, start, end) - return _na_map(f, arr, dtype="int64") + return _na_map(f, arr, dtype=int) def str_pad(arr, width, side="left", fillchar=" "): @@ -1447,15 +1442,15 @@ def str_pad(arr, width, side="left", fillchar=" "): dtype: object """ if not isinstance(fillchar, str): - msg = f"fillchar must be a character, not {type(fillchar).__name__}" - raise TypeError(msg) + msg = "fillchar must be a character, not {0}" + raise TypeError(msg.format(type(fillchar).__name__)) if len(fillchar) != 1: raise TypeError("fillchar must be a character, not str") if not is_integer(width): - msg = f"width must be of integer type, not {type(width).__name__}" - raise TypeError(msg) + msg = "width must be of integer type, not {0}" + raise TypeError(msg.format(type(width).__name__)) if side == "left": f = lambda x: x.rjust(width, fillchar) @@ -3215,7 +3210,7 @@ def rindex(self, sub, start=0, end=None): len, docstring=_shared_docs["len"], forbidden_types=None, - dtype="int64", + dtype=int, returns_string=False, ) diff --git a/pandas/io/common.py b/pandas/io/common.py index c62de76286610..a01011cd7d4e4 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -2,6 +2,7 @@ import bz2 import codecs +import csv import gzip from io import BufferedIOBase, BytesIO import mmap @@ -16,7 +17,9 @@ List, Mapping, Optional, + TextIO, Tuple, + Type, Union, ) from urllib.parse import ( # noqa @@ -44,6 +47,29 @@ lzma = _import_lzma() +# common NA values +# no longer excluding inf representations +# '1.#INF','-1.#INF', '1.#INF000000', +_NA_VALUES = { + "-1.#IND", + "1.#QNAN", + "1.#IND", + "-1.#QNAN", + "#N/A N/A", + "#N/A", + "N/A", + "n/a", + "NA", + "#NA", + "NULL", + "null", + "NaN", + "-NaN", + "nan", + "-nan", + "", +} + _VALID_URLS = set(uses_relative + uses_netloc + uses_params) _VALID_URLS.discard("") @@ -61,7 +87,7 @@ def __next__(self): raise AbstractMethodError(self) -def is_url(url) -> bool: +def _is_url(url) -> bool: """ Check to see if a URL has a valid protocol. @@ -99,7 +125,7 @@ def _expand_user( return filepath_or_buffer -def validate_header_arg(header) -> None: +def _validate_header_arg(header) -> None: if isinstance(header, bool): raise TypeError( "Passing a bool to header is invalid. " @@ -109,7 +135,7 @@ def validate_header_arg(header) -> None: ) -def stringify_path( +def _stringify_path( filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: """Attempt to convert a path-like object to a string. @@ -190,9 +216,9 @@ def get_filepath_or_buffer( compression, str, should_close, bool) """ - filepath_or_buffer = stringify_path(filepath_or_buffer) + filepath_or_buffer = _stringify_path(filepath_or_buffer) - if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer): + if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer): req = urlopen(filepath_or_buffer) content_encoding = req.headers.get("Content-Encoding", None) if content_encoding == "gzip": @@ -247,7 +273,7 @@ def file_path_to_url(path: str) -> str: _compression_to_extension = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz"} -def get_compression_method( +def _get_compression_method( compression: Optional[Union[str, Mapping[str, str]]] ) -> Tuple[Optional[str], Dict[str, str]]: """ @@ -280,7 +306,7 @@ def get_compression_method( return compression, compression_args -def infer_compression( +def _infer_compression( filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] ) -> Optional[str]: """ @@ -314,7 +340,7 @@ def infer_compression( # Infer compression if compression == "infer": # Convert all path types (e.g. pathlib.Path) to strings - filepath_or_buffer = stringify_path(filepath_or_buffer) + filepath_or_buffer = _stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): # Cannot infer compression of a buffer, assume no compression return None @@ -335,7 +361,7 @@ def infer_compression( raise ValueError(msg) -def get_handle( +def _get_handle( path_or_buf, mode: str, encoding=None, @@ -393,12 +419,12 @@ def get_handle( f = path_or_buf # Convert pathlib.Path/py.path.local or string - path_or_buf = stringify_path(path_or_buf) + path_or_buf = _stringify_path(path_or_buf) is_path = isinstance(path_or_buf, str) - compression, compression_args = get_compression_method(compression) + compression, compression_args = _get_compression_method(compression) if is_path: - compression = infer_compression(path_or_buf, compression) + compression = _infer_compression(path_or_buf, compression) if compression: @@ -418,7 +444,7 @@ def get_handle( # ZIP Compression elif compression == "zip": - zf = _BytesZipFile(path_or_buf, mode, **compression_args) + zf = BytesZipFile(path_or_buf, mode, **compression_args) # Ensure the container is closed as well. handles.append(zf) if zf.mode == "w": @@ -469,7 +495,7 @@ def get_handle( if memory_map and hasattr(f, "fileno"): try: - wrapped = _MMapWrapper(f) + wrapped = MMapWrapper(f) f.close() f = wrapped except Exception: @@ -482,7 +508,7 @@ def get_handle( return f, handles -class _BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore +class BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore """ Wrapper for standard library class ZipFile and allow the returned file-like handle to accept byte strings via `write` method. @@ -515,7 +541,7 @@ def closed(self): return self.fp is None -class _MMapWrapper(BaseIterator): +class MMapWrapper(BaseIterator): """ Wrapper for the Python's mmap class so that it can be properly read in by Python's csv.reader class. @@ -534,7 +560,7 @@ def __init__(self, f: IO): def __getattr__(self, name: str): return getattr(self.mmap, name) - def __iter__(self) -> "_MMapWrapper": + def __iter__(self) -> "MMapWrapper": return self def __next__(self) -> str: @@ -571,3 +597,16 @@ def next(self) -> bytes: def close(self): self.reader.close() + + +# Keeping these class for now because it provides a necessary convenience +# for "dropping" the "encoding" argument from our I/O arguments when +# creating a Unicode I/O object. +def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds): + return csv.reader(f, dialect=dialect, **kwds) + + +def UnicodeWriter( + f: TextIO, dialect: Type[csv.Dialect] = csv.excel, encoding: str = "utf-8", **kwds +): + return csv.writer(f, dialect=dialect, **kwds) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 553334407d12e..81d3d46f78bdb 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -6,7 +6,6 @@ from pandas._config import config -from pandas._libs.parsers import STR_NA_VALUES from pandas.errors import EmptyDataError from pandas.util._decorators import Appender @@ -15,11 +14,12 @@ from pandas.core.frame import DataFrame from pandas.io.common import ( + _NA_VALUES, + _is_url, + _stringify_path, + _validate_header_arg, get_filepath_or_buffer, - is_url, - stringify_path, urlopen, - validate_header_arg, ) from pandas.io.excel._util import ( _fill_mi_header, @@ -124,7 +124,7 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" - + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True Whether or not to include the default NaN values when parsing the data. @@ -339,7 +339,7 @@ def read_excel( class _BaseExcelReader(metaclass=abc.ABCMeta): def __init__(self, filepath_or_buffer): # If filepath_or_buffer is a url, load the data into a BytesIO - if is_url(filepath_or_buffer): + if _is_url(filepath_or_buffer): filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read()) elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer) @@ -408,7 +408,7 @@ def parse( **kwds, ): - validate_header_arg(header) + _validate_header_arg(header) ret_dict = False @@ -708,7 +708,7 @@ def __init__( self.mode = mode def __fspath__(self): - return stringify_path(self.path) + return _stringify_path(self.path) def _get_sheet_name(self, sheet_name): if sheet_name is None: @@ -808,7 +808,7 @@ def __init__(self, io, engine=None): # could be a str, ExcelFile, Book, etc. self.io = io # Always a string - self._io = stringify_path(io) + self._io = _stringify_path(io) self._reader = self._engines[engine](self._io) diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index eb05004d9137c..01118d7b7cd3e 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -4,7 +4,7 @@ from pandas import DataFrame, Int64Index, RangeIndex -from pandas.io.common import stringify_path +from pandas.io.common import _stringify_path def to_feather(df: DataFrame, path): @@ -20,7 +20,7 @@ def to_feather(df: DataFrame, path): import_optional_dependency("pyarrow") from pyarrow import feather - path = stringify_path(path) + path = _stringify_path(path) if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") @@ -34,11 +34,10 @@ def to_feather(df: DataFrame, path): # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): - typ = type(df.index) raise ValueError( - f"feather does not support serializing {typ} " + "feather does not support serializing {} " "for the index; you can .reset_index() " - "to make the index into column(s)" + "to make the index into column(s)".format(type(df.index)) ) if not df.index.equals(RangeIndex.from_range(range(len(df)))): @@ -64,7 +63,7 @@ def to_feather(df: DataFrame, path): feather.write_feather(df, path) -def read_feather(path, columns=None, use_threads: bool = True): +def read_feather(path, columns=None, use_threads=True): """ Load a feather-format object from the file path. @@ -98,6 +97,6 @@ def read_feather(path, columns=None, use_threads: bool = True): import_optional_dependency("pyarrow") from pyarrow import feather - path = stringify_path(path) + path = _stringify_path(path) return feather.read_feather(path, columns=columns, use_threads=bool(use_threads)) diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index bed29e1fd4792..1d4fa929b2138 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -6,8 +6,7 @@ def get_console_size(): - """ - Return console size as tuple = (width, height). + """Return console size as tuple = (width, height). Returns (None,None) in non-interactive session. """ @@ -51,13 +50,9 @@ def get_console_size(): def in_interactive_session(): - """ - Check if we're running in an interactive shell. + """ check if we're running in an interactive shell - Returns - ------- - bool - True if running under python/ipython interactive shell. + returns True if running under python/ipython interactive shell """ from pandas import get_option @@ -76,11 +71,7 @@ def check_main(): def in_ipython_frontend(): """ - Check if we're inside an an IPython zmq frontend. - - Returns - ------- - bool + check if we're inside an an IPython zmq frontend """ try: ip = get_ipython() # noqa diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 3a91d65ab4562..ae5d1d30bcddb 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -5,7 +5,7 @@ import csv as csvlib from io import StringIO import os -from typing import List +from typing import Any, Dict, List import warnings from zipfile import ZipFile @@ -22,10 +22,11 @@ from pandas.core.dtypes.missing import notna from pandas.io.common import ( - get_compression_method, + UnicodeWriter, + _get_compression_method, + _get_handle, + _infer_compression, get_filepath_or_buffer, - get_handle, - infer_compression, ) @@ -60,7 +61,7 @@ def __init__( path_or_buf = StringIO() # Extract compression mode as given, if dict - compression, self.compression_args = get_compression_method(compression) + compression, self.compression_args = _get_compression_method(compression) self.path_or_buf, _, _, _ = get_filepath_or_buffer( path_or_buf, encoding=encoding, compression=compression, mode=mode @@ -77,7 +78,7 @@ def __init__( if encoding is None: encoding = "utf-8" self.encoding = encoding - self.compression = infer_compression(self.path_or_buf, compression) + self.compression = _infer_compression(self.path_or_buf, compression) if quoting is None: quoting = csvlib.QUOTE_MINIMAL @@ -178,7 +179,7 @@ def save(self): f = self.path_or_buf close = False else: - f, handles = get_handle( + f, handles = _get_handle( self.path_or_buf, self.mode, encoding=self.encoding, @@ -187,9 +188,7 @@ def save(self): close = True try: - # Note: self.encoding is irrelevant here - self.writer = csvlib.writer( - f, + writer_kwargs: Dict[str, Any] = dict( lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, @@ -197,6 +196,10 @@ def save(self): escapechar=self.escapechar, quotechar=self.quotechar, ) + if self.encoding == "ascii": + self.writer = csvlib.writer(f, **writer_kwargs) + else: + self.writer = UnicodeWriter(f, encoding=self.encoding, **writer_kwargs) self._save() @@ -209,7 +212,7 @@ def save(self): else: compression = dict(self.compression_args, method=self.compression) - f, handles = get_handle( + f, handles = _get_handle( self.path_or_buf, self.mode, encoding=self.encoding, diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 18340bc702378..2f7a80eea1554 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -15,7 +15,6 @@ from pandas import Index import pandas.core.common as com -from pandas.io.common import stringify_path from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.format import get_level_lengths from pandas.io.formats.printing import pprint_thing @@ -712,6 +711,7 @@ def write( and ``io.excel.xlsm.writer``. """ from pandas.io.excel import ExcelWriter + from pandas.io.common import _stringify_path num_rows, num_cols = self.df.shape if num_rows > self.max_rows or num_cols > self.max_cols: @@ -724,7 +724,7 @@ def write( if isinstance(writer, ExcelWriter): need_save = False else: - writer = ExcelWriter(stringify_path(writer), engine=engine) + writer = ExcelWriter(_stringify_path(writer), engine=engine) need_save = True formatted_cells = self.get_formatted_cells() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b0574925cf1b1..109df6584641d 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -72,7 +72,7 @@ from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.io.common import stringify_path +from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing if TYPE_CHECKING: @@ -482,7 +482,7 @@ def get_buffer( objects, otherwise yield buf unchanged. """ if buf is not None: - buf = stringify_path(buf) + buf = _stringify_path(buf) else: buf = StringIO() @@ -1640,7 +1640,7 @@ def _get_format_datetime64_from_values( """ given values and a date_format, return a string format """ if isinstance(values, np.ndarray) and values.ndim > 1: - # We don't actually care about the order of values, and DatetimeIndex + # We don't actaully care about the order of values, and DatetimeIndex # only accepts 1D values values = values.ravel() diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 3a3347a5c86ea..0c6b0c1a5810b 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -12,7 +12,7 @@ from pandas import option_context -from pandas.io.common import is_url +from pandas.io.common import _is_url from pandas.io.formats.format import ( DataFrameFormatter, TableFormatter, @@ -147,7 +147,7 @@ def _write_cell( rs = pprint_thing(s, escape_chars=esc).strip() - if self.render_links and is_url(rs): + if self.render_links and _is_url(rs): rs_unescaped = pprint_thing(s, escape_chars={}).strip() start_tag += ''.format(url=rs_unescaped) end_a = "" diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index d9711f4f4626a..8a4a72021eb43 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -65,7 +65,7 @@ def read_gbq( *New in version 0.2.0 of pandas-gbq*. dialect : str, default 'legacy' - Note: The default value is changing to 'standard' in a future version. + Note: The default value is changing to 'standard' in a future verion. SQL syntax dialect to use. Value can be one of: diff --git a/pandas/io/html.py b/pandas/io/html.py index eafcca0e85bb3..3521bad375aa6 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -16,7 +16,7 @@ from pandas.core.construction import create_series_with_explicit_dtype -from pandas.io.common import is_url, urlopen, validate_header_arg +from pandas.io.common import _is_url, _validate_header_arg, urlopen from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import TextParser @@ -117,7 +117,7 @@ def _read(obj): ------- raw_text : str """ - if is_url(obj): + if _is_url(obj): with urlopen(obj) as url: text = url.read() elif hasattr(obj, "read"): @@ -705,7 +705,7 @@ def _build_doc(self): parser = HTMLParser(recover=True, encoding=self.encoding) try: - if is_url(self.io): + if _is_url(self.io): with urlopen(self.io) as f: r = parse(f, parser=parser) else: @@ -717,7 +717,7 @@ def _build_doc(self): pass except (UnicodeDecodeError, IOError) as e: # if the input is a blob of html goop - if not is_url(self.io): + if not _is_url(self.io): r = fromstring(self.io, parser=parser) try: @@ -1076,7 +1076,7 @@ def read_html( "cannot skip rows starting from the end of the " "data (you passed a negative value)" ) - validate_header_arg(header) + _validate_header_arg(header) return _parse( flavor=flavor, io=io, diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 93b28f8a0e285..f73a314d4da29 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -20,10 +20,10 @@ from pandas.io.common import ( BaseIterator, + _get_handle, + _infer_compression, + _stringify_path, get_filepath_or_buffer, - get_handle, - infer_compression, - stringify_path, ) from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import _validate_integer @@ -59,7 +59,7 @@ def to_json( "'index=False' is only valid when 'orient' is " "'split' or 'table'" ) - path_or_buf = stringify_path(path_or_buf) + path_or_buf = _stringify_path(path_or_buf) if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") @@ -92,7 +92,7 @@ def to_json( s = convert_to_line_delimits(s) if isinstance(path_or_buf, str): - fh, handles = get_handle(path_or_buf, "w", compression=compression) + fh, handles = _get_handle(path_or_buf, "w", compression=compression) try: fh.write(s) finally: @@ -315,7 +315,7 @@ def __init__( timedeltas = obj.select_dtypes(include=["timedelta"]).columns if len(timedeltas): obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) - # Convert PeriodIndex to datetimes before serializing + # Convert PeriodIndex to datetimes before serialzing if is_period_dtype(obj.index): obj.index = obj.index.to_timestamp() @@ -588,7 +588,7 @@ def read_json( if encoding is None: encoding = "utf-8" - compression = infer_compression(path_or_buf, compression) + compression = _infer_compression(path_or_buf, compression) filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( path_or_buf, encoding=encoding, compression=compression ) @@ -708,7 +708,7 @@ def _get_data_from_filepath(self, filepath_or_buffer): pass if exists or self.compression is not None: - data, _ = get_handle( + data, _ = _get_handle( filepath_or_buffer, "r", encoding=self.encoding, diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index f68347f042086..54e44ff33d079 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -46,7 +46,7 @@ def get_engine(engine: str) -> "BaseImpl": class BaseImpl: @staticmethod - def validate_dataframe(df: DataFrame): + def validate_dataframe(df): if not isinstance(df, DataFrame): raise ValueError("to_parquet only supports IO with DataFrames") @@ -62,7 +62,7 @@ def validate_dataframe(df: DataFrame): if not valid_names: raise ValueError("Index level names must be strings") - def write(self, df: DataFrame, path, compression, **kwargs): + def write(self, df, path, compression, **kwargs): raise AbstractMethodError(self) def read(self, path, columns=None, **kwargs): @@ -80,7 +80,7 @@ def __init__(self): def write( self, - df: DataFrame, + df, path, compression="snappy", coerce_timestamps="ms", @@ -137,13 +137,7 @@ def __init__(self): self.api = fastparquet def write( - self, - df: DataFrame, - path, - compression="snappy", - index=None, - partition_cols=None, - **kwargs, + self, df, path, compression="snappy", index=None, partition_cols=None, **kwargs ): self.validate_dataframe(df) # thriftpy/protocol/compact.py:339: @@ -202,9 +196,9 @@ def read(self, path, columns=None, **kwargs): def to_parquet( - df: DataFrame, + df, path, - engine: str = "auto", + engine="auto", compression="snappy", index: Optional[bool] = None, partition_cols=None, @@ -215,7 +209,6 @@ def to_parquet( Parameters ---------- - df : DataFrame path : str File path or Root Directory path. Will be used as Root Directory path while writing a partitioned dataset. @@ -262,7 +255,7 @@ def to_parquet( ) -def read_parquet(path, engine: str = "auto", columns=None, **kwargs): +def read_parquet(path, engine="auto", columns=None, **kwargs): """ Load a parquet object from the file path, returning a DataFrame. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 37cd36a2be3bc..c87edcc602686 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -17,7 +17,6 @@ import pandas._libs.lib as lib import pandas._libs.ops as libops import pandas._libs.parsers as parsers -from pandas._libs.parsers import STR_NA_VALUES from pandas._libs.tslibs import parsing from pandas.errors import ( AbstractMethodError, @@ -35,7 +34,6 @@ is_categorical_dtype, is_dtype_equal, is_extension_array_dtype, - is_file_like, is_float, is_integer, is_integer_dtype, @@ -62,12 +60,15 @@ from pandas.core.tools import datetimes as tools from pandas.io.common import ( + _NA_VALUES, BaseIterator, + UnicodeReader, UTF8Recoder, + _get_handle, + _infer_compression, + _validate_header_arg, get_filepath_or_buffer, - get_handle, - infer_compression, - validate_header_arg, + is_file_like, ) from pandas.io.date_converters import generic_parser @@ -194,7 +195,7 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" - + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True Whether or not to include the default NaN values when parsing the data. @@ -425,7 +426,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): kwds["encoding"] = encoding compression = kwds.get("compression", "infer") - compression = infer_compression(filepath_or_buffer, compression) + compression = _infer_compression(filepath_or_buffer, compression) # TODO: get_filepath_or_buffer could return # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile] @@ -1049,7 +1050,7 @@ def _clean_options(self, options, engine): na_values = options["na_values"] skiprows = options["skiprows"] - validate_header_arg(options["header"]) + _validate_header_arg(options["header"]) depr_warning = "" @@ -2282,7 +2283,7 @@ def __init__(self, f, **kwds): self.comment = kwds["comment"] self._comment_lines = [] - f, handles = get_handle( + f, handles = _get_handle( f, "r", encoding=self.encoding, @@ -2430,13 +2431,23 @@ class MyDialect(csv.Dialect): self.line_pos += 1 sniffed = csv.Sniffer().sniff(line) dia.delimiter = sniffed.delimiter + if self.encoding is not None: + self.buf.extend( + list( + UnicodeReader( + StringIO(line), dialect=dia, encoding=self.encoding + ) + ) + ) + else: + self.buf.extend(list(csv.reader(StringIO(line), dialect=dia))) - # Note: self.encoding is irrelevant here - line_rdr = csv.reader(StringIO(line), dialect=dia) - self.buf.extend(list(line_rdr)) - - # Note: self.encoding is irrelevant here - reader = csv.reader(f, dialect=dia, strict=True) + if self.encoding is not None: + reader = UnicodeReader( + f, dialect=dia, encoding=self.encoding, strict=True + ) + else: + reader = csv.reader(f, dialect=dia, strict=True) else: @@ -3387,7 +3398,7 @@ def _clean_na_values(na_values, keep_default_na=True): if na_values is None: if keep_default_na: - na_values = STR_NA_VALUES + na_values = _NA_VALUES else: na_values = set() na_fvalues = set() @@ -3404,7 +3415,7 @@ def _clean_na_values(na_values, keep_default_na=True): v = [v] if keep_default_na: - v = set(v) | STR_NA_VALUES + v = set(v) | _NA_VALUES na_values[k] = v na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()} @@ -3413,7 +3424,7 @@ def _clean_na_values(na_values, keep_default_na=True): na_values = [na_values] na_values = _stringify_na_values(na_values) if keep_default_na: - na_values = na_values | STR_NA_VALUES + na_values = na_values | _NA_VALUES na_fvalues = _floatify_na_values(na_values) @@ -3481,7 +3492,7 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): # 2) index_names (column names) # # Both must be non-null to ensure a successful construction. Otherwise, - # we have to create a generic empty Index. + # we have to create a generic emtpy Index. if (index_col is None or index_col is False) or index_names is None: index = Index([]) else: @@ -3564,7 +3575,7 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na): return na_values[col], na_fvalues[col] else: if keep_default_na: - return STR_NA_VALUES, set() + return _NA_VALUES, set() return set(), set() else: diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 6ce52da21b4e8..0a0ccedd78f00 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -4,7 +4,7 @@ from pandas.compat import pickle_compat as pc -from pandas.io.common import get_handle, stringify_path +from pandas.io.common import _get_handle, _stringify_path def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL): @@ -63,8 +63,8 @@ def to_pickle(obj, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL): >>> import os >>> os.remove("./dummy.pkl") """ - path = stringify_path(path) - f, fh = get_handle(path, "wb", compression=compression, is_text=False) + path = _stringify_path(path) + f, fh = _get_handle(path, "wb", compression=compression, is_text=False) if protocol < 0: protocol = pickle.HIGHEST_PROTOCOL try: @@ -134,8 +134,8 @@ def read_pickle(path, compression="infer"): >>> import os >>> os.remove("./dummy.pkl") """ - path = stringify_path(path) - f, fh = get_handle(path, "rb", compression=compression, is_text=False) + path = _stringify_path(path) + f, fh = _get_handle(path, "rb", compression=compression, is_text=False) # 1) try standard library Pickle # 2) try pickle_compat (older pandas version) to handle subclass changes diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8e0ab27c1fa85..07bf30e51a763 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -63,7 +63,7 @@ from pandas.core.computation.pytables import PyTablesExpr, maybe_expression from pandas.core.indexes.api import ensure_index -from pandas.io.common import stringify_path +from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, pprint_thing if TYPE_CHECKING: @@ -176,6 +176,22 @@ class DuplicateWarning(Warning): # formats _FORMAT_MAP = {"f": "fixed", "fixed": "fixed", "t": "table", "table": "table"} +# storer class map +_STORER_MAP = { + "series": "SeriesFixed", + "frame": "FrameFixed", +} + +# table class map +_TABLE_MAP = { + "generic_table": "GenericTable", + "appendable_series": "AppendableSeriesTable", + "appendable_multiseries": "AppendableMultiSeriesTable", + "appendable_frame": "AppendableFrameTable", + "appendable_multiframe": "AppendableMultiFrameTable", + "worm": "WORMTable", +} + # axes map _AXES_MAP = {DataFrame: [0]} @@ -274,7 +290,7 @@ def to_hdf( encoding=encoding, ) - path_or_buf = stringify_path(path_or_buf) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): with HDFStore( path_or_buf, mode=mode, complevel=complevel, complib=complib @@ -379,7 +395,7 @@ def read_hdf( store = path_or_buf auto_close = False else: - path_or_buf = stringify_path(path_or_buf) + path_or_buf = _stringify_path(path_or_buf) if not isinstance(path_or_buf, str): raise NotImplementedError( "Support for generic buffers has not been implemented." @@ -525,7 +541,7 @@ def __init__( if complib is None and complevel is not None: complib = tables.filters.default_complib - self._path = stringify_path(path) + self._path = _stringify_path(path) if mode is None: mode = "a" self._mode = mode @@ -1537,17 +1553,12 @@ def _create_storer( self, group, format=None, - value: Optional[FrameOrSeries] = None, + value=None, encoding: str = "UTF-8", errors: str = "strict", ) -> Union["GenericFixed", "Table"]: """ return a suitable class to operate """ - cls: Union[Type["GenericFixed"], Type["Table"]] - - if value is not None and not isinstance(value, (Series, DataFrame)): - raise TypeError("value must be None, Series, or DataFrame") - def error(t): # return instead of raising so mypy can tell where we are raising return TypeError( @@ -1576,7 +1587,10 @@ def error(t): ) else: _TYPE_MAP = {Series: "series", DataFrame: "frame"} - pt = _TYPE_MAP[type(value)] + try: + pt = _TYPE_MAP[type(value)] + except KeyError: + raise error("_TYPE_MAP") # we are actually a table if format == "table": @@ -1584,12 +1598,12 @@ def error(t): # a storer node if "table" not in pt: - _STORER_MAP = {"series": SeriesFixed, "frame": FrameFixed} try: - cls = _STORER_MAP[pt] + return globals()[_STORER_MAP[pt]]( + self, group, encoding=encoding, errors=errors + ) except KeyError: raise error("_STORER_MAP") - return cls(self, group, encoding=encoding, errors=errors) # existing node (and must be a table) if tt is None: @@ -1611,22 +1625,29 @@ def error(t): tt = "appendable_frame" elif index.nlevels > 1: tt = "appendable_multiframe" + elif pt == "wide_table": + tt = "appendable_panel" + elif pt == "ndim_table": + tt = "appendable_ndim" + + else: + + # distinguish between a frame/table + tt = "legacy_panel" + try: + fields = group.table._v_attrs.fields + if len(fields) == 1 and fields[0] == "value": + tt = "legacy_frame" + except IndexError: + pass - _TABLE_MAP = { - "generic_table": GenericTable, - "appendable_series": AppendableSeriesTable, - "appendable_multiseries": AppendableMultiSeriesTable, - "appendable_frame": AppendableFrameTable, - "appendable_multiframe": AppendableMultiFrameTable, - "worm": WORMTable, - } try: - cls = _TABLE_MAP[tt] + return globals()[_TABLE_MAP[tt]]( + self, group, encoding=encoding, errors=errors + ) except KeyError: raise error("_TABLE_MAP") - return cls(self, group, encoding=encoding, errors=errors) - def _write_to_group( self, key: str, @@ -2012,6 +2033,9 @@ def maybe_set_size(self, min_itemsize=None): if min_itemsize is not None and self.typ.itemsize < min_itemsize: self.typ = _tables().StringCol(itemsize=min_itemsize, pos=self.pos) + def validate(self, handler, append): + self.validate_names() + def validate_names(self): pass @@ -3152,6 +3176,7 @@ def __init__( self.non_index_axes = [] self.values_axes = [] self.data_columns = [] + self.metadata = [] self.info = dict() self.nan_rep = None @@ -3205,13 +3230,13 @@ def validate(self, other): oax = ov[i] if sax != oax: raise ValueError( - f"invalid combination of [{c}] on appending data " + f"invalid combinate of [{c}] on appending data " f"[{sax}] vs current table [{oax}]" ) # should never get here raise Exception( - f"invalid combination of [{c}] on appending data [{sv}] vs " + f"invalid combinate of [{c}] on appending data [{sv}] vs " f"current table [{ov}]" ) @@ -3351,6 +3376,7 @@ def set_attrs(self): self.attrs.encoding = self.encoding self.attrs.errors = self.errors self.attrs.levels = self.levels + self.attrs.metadata = self.metadata self.attrs.info = self.info def get_attrs(self): @@ -3364,6 +3390,7 @@ def get_attrs(self): self.levels = getattr(self.attrs, "levels", None) or [] self.index_axes = [a for a in self.indexables if a.is_an_indexable] self.values_axes = [a for a in self.indexables if not a.is_an_indexable] + self.metadata = getattr(self.attrs, "metadata", None) or [] def validate_version(self, where=None): """ are we trying to operate on an old version? """ @@ -3582,8 +3609,7 @@ def _read_axes( return results - @classmethod - def get_object(cls, obj, transposed: bool): + def get_object(self, obj, transposed: bool): """ return the data for this obj """ return obj @@ -3614,7 +3640,6 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): if isinstance(min_itemsize, dict): existing_data_columns = set(data_columns) - data_columns = list(data_columns) # ensure we do not modify data_columns.extend( [ k @@ -3626,10 +3651,10 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): # return valid columns in the order of our axis return [c for c in data_columns if c in axis_labels] - def _create_axes( + def create_axes( self, axes, - obj: DataFrame, + obj, validate: bool = True, nan_rep=None, data_columns=None, @@ -3654,16 +3679,16 @@ def _create_axes( """ - if not isinstance(obj, DataFrame): - group = self.group._v_name - raise TypeError( - f"cannot properly create the storer for: [group->{group}," - f"value->{type(obj)}]" - ) - # set the default axes if needed if axes is None: - axes = [0] + try: + axes = _AXES_MAP[type(obj)] + except KeyError: + group = self.group._v_name + raise TypeError( + f"cannot properly create the storer for: [group->{group}," + f"value->{type(obj)}]" + ) # map axes to numbers axes = [obj._get_axis_number(a) for a in axes] @@ -3671,14 +3696,15 @@ def _create_axes( # do we have an existing table (if so, use its axes & data_columns) if self.infer_axes(): existing_table = self.copy() - axes = [a.axis for a in self.index_axes] - data_columns = self.data_columns - nan_rep = self.nan_rep - new_info = self.info - # TODO: do we always have validate=True here? + existing_table.infer_axes() + axes = [a.axis for a in existing_table.index_axes] + data_columns = existing_table.data_columns + nan_rep = existing_table.nan_rep + self.encoding = existing_table.encoding + self.errors = existing_table.errors + self.info = copy.copy(existing_table.info) else: existing_table = None - new_info = self.info assert self.ndim == 2 # with next check, we must have len(axes) == 1 # currently support on ndim-1 axes @@ -3694,7 +3720,7 @@ def _create_axes( if nan_rep is None: nan_rep = "nan" - # We construct the non-index-axis first, since that alters new_info + # We construct the non-index-axis first, since that alters self.info idx = [x for x in [0, 1] if x not in axes][0] a = obj.axes[idx] @@ -3712,7 +3738,7 @@ def _create_axes( append_axis = exist_axis # the non_index_axes info - info = new_info.setdefault(idx, {}) + info = self.info.setdefault(idx, {}) info["names"] = list(a.names) info["type"] = type(a).__name__ @@ -3721,14 +3747,14 @@ def _create_axes( # Now we can construct our new index axis idx = axes[0] a = obj.axes[idx] - index_name = obj._AXIS_NAMES[idx] - new_index = _convert_index(index_name, a, self.encoding, self.errors) + name = obj._AXIS_NAMES[idx] + new_index = _convert_index(name, a, self.encoding, self.errors) new_index.axis = idx # Because we are always 2D, there is only one new_index, so # we know it will have pos=0 new_index.set_pos(0) - new_index.update_info(new_info) + new_index.update_info(self.info) new_index.maybe_set_size(min_itemsize) # check for column conflicts new_index_axes = [new_index] @@ -3746,13 +3772,47 @@ def get_blk_items(mgr, blocks): transposed = new_index.axis == 1 # figure out data_columns and get out blocks + block_obj = self.get_object(obj, transposed)._consolidate() + blocks = block_obj._data.blocks + blk_items = get_blk_items(block_obj._data, blocks) + data_columns = self.validate_data_columns( data_columns, min_itemsize, new_non_index_axes ) - block_obj = self.get_object(obj, transposed)._consolidate() - blocks, blk_items = self._get_blocks_and_items( - block_obj, existing_table, new_non_index_axes, data_columns - ) + if len(data_columns): + axis, axis_labels = new_non_index_axes[0] + new_labels = Index(axis_labels).difference(Index(data_columns)) + mgr = block_obj.reindex(new_labels, axis=axis)._data + + blocks = list(mgr.blocks) + blk_items = get_blk_items(mgr, blocks) + for c in data_columns: + mgr = block_obj.reindex([c], axis=axis)._data + blocks.extend(mgr.blocks) + blk_items.extend(get_blk_items(mgr, mgr.blocks)) + + # reorder the blocks in the same order as the existing_table if we can + if existing_table is not None: + by_items = { + tuple(b_items.tolist()): (b, b_items) + for b, b_items in zip(blocks, blk_items) + } + new_blocks = [] + new_blk_items = [] + for ea in existing_table.values_axes: + items = tuple(ea.values) + try: + b, b_items = by_items.pop(items) + new_blocks.append(b) + new_blk_items.append(b_items) + except (IndexError, KeyError): + jitems = ",".join(pprint_thing(item) for item in items) + raise ValueError( + f"cannot match existing table structure for [{jitems}] " + "on appending data" + ) + blocks = new_blocks + blk_items = new_blk_items # add my values vaxes = [] @@ -3821,7 +3881,7 @@ def get_blk_items(mgr, blocks): dtype=dtype_name, data=data, ) - col.update_info(new_info) + col.update_info(self.info) vaxes.append(col) @@ -3836,59 +3896,13 @@ def get_blk_items(mgr, blocks): # validate our min_itemsize self.validate_min_itemsize(min_itemsize) + # validate our metadata + self.metadata = [c.name for c in self.values_axes if c.metadata is not None] + # validate the axes if we have an existing table if validate: self.validate(existing_table) - @staticmethod - def _get_blocks_and_items( - block_obj, existing_table, new_non_index_axes, data_columns - ): - # Helper to clarify non-state-altering parts of _create_axes - - def get_blk_items(mgr, blocks): - return [mgr.items.take(blk.mgr_locs) for blk in blocks] - - blocks = block_obj._data.blocks - blk_items = get_blk_items(block_obj._data, blocks) - - if len(data_columns): - axis, axis_labels = new_non_index_axes[0] - new_labels = Index(axis_labels).difference(Index(data_columns)) - mgr = block_obj.reindex(new_labels, axis=axis)._data - - blocks = list(mgr.blocks) - blk_items = get_blk_items(mgr, blocks) - for c in data_columns: - mgr = block_obj.reindex([c], axis=axis)._data - blocks.extend(mgr.blocks) - blk_items.extend(get_blk_items(mgr, mgr.blocks)) - - # reorder the blocks in the same order as the existing_table if we can - if existing_table is not None: - by_items = { - tuple(b_items.tolist()): (b, b_items) - for b, b_items in zip(blocks, blk_items) - } - new_blocks = [] - new_blk_items = [] - for ea in existing_table.values_axes: - items = tuple(ea.values) - try: - b, b_items = by_items.pop(items) - new_blocks.append(b) - new_blk_items.append(b_items) - except (IndexError, KeyError): - jitems = ",".join(pprint_thing(item) for item in items) - raise ValueError( - f"cannot match existing table structure for [{jitems}] " - "on appending data" - ) - blocks = new_blocks - blk_items = new_blk_items - - return blocks, blk_items - def process_axes(self, obj, selection: "Selection", columns=None): """ process axes filters """ @@ -4103,7 +4117,7 @@ def write( self._handle.remove_node(self.group, "table") # create the axes - self._create_axes( + self.create_axes( axes=axes, obj=obj, validate=append, @@ -4113,7 +4127,7 @@ def write( ) for a in self.axes: - a.validate_names() + a.validate(self, append) if not self.is_exists: @@ -4322,8 +4336,7 @@ class AppendableFrameTable(AppendableTable): def is_transposed(self) -> bool: return self.index_axes[0].axis == 1 - @classmethod - def get_object(cls, obj, transposed: bool): + def get_object(self, obj, transposed: bool): """ these are written transposed """ if transposed: obj = obj.T @@ -4422,8 +4435,7 @@ class AppendableSeriesTable(AppendableFrameTable): def is_transposed(self) -> bool: return False - @classmethod - def get_object(cls, obj, transposed: bool): + def get_object(self, obj, transposed: bool): return obj def write(self, obj, data_columns=None, **kwargs): diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 56ebb583bc2f9..6bd3532d538c7 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -1,7 +1,7 @@ """ Read SAS sas7bdat or xport files. """ -from pandas.io.common import stringify_path +from pandas.io.common import _stringify_path def read_sas( @@ -52,7 +52,7 @@ def read_sas( "than a string name, you must specify " "a format string" ) - filepath_or_buffer = stringify_path(filepath_or_buffer) + filepath_or_buffer = _stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): raise ValueError(buffer_error_msg) fname = filepath_or_buffer.lower() diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 84dd302fc293f..dbe64e4c0f06d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -44,7 +44,7 @@ from pandas.core.frame import DataFrame from pandas.core.series import Series -from pandas.io.common import BaseIterator, get_filepath_or_buffer, stringify_path +from pandas.io.common import BaseIterator, _stringify_path, get_filepath_or_buffer _version_error = ( "Version of given Stata file is not 104, 105, 108, " @@ -1051,7 +1051,7 @@ def __init__( self._lines_read = 0 self._native_byteorder = _set_endianness(sys.byteorder) - path_or_buf = stringify_path(path_or_buf) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, str): path_or_buf, encoding, _, should_close = get_filepath_or_buffer(path_or_buf) @@ -2112,7 +2112,7 @@ def __init__( if byteorder is None: byteorder = sys.byteorder self._byteorder = _set_endianness(byteorder) - self._fname = stringify_path(fname) + self._fname = _stringify_path(fname) self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} self._converted_names = {} diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index da2ef5260d070..bf11b81af6f90 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -10,15 +10,7 @@ ) from pandas.core.dtypes.common import is_dtype_equal -from pandas import ( - Categorical, - Interval, - Period, - Series, - Timedelta, - Timestamp, - date_range, -) +from pandas import Categorical, Period, Series, Timedelta, Timestamp, date_range import pandas.util.testing as tm @@ -115,25 +107,6 @@ def test_infer_from_scalar_tz(tz, pandas_dtype): assert val == exp_val -@pytest.mark.parametrize( - "left, right, subtype", - [ - (0, 1, "int64"), - (0.0, 1.0, "float64"), - (Timestamp(0), Timestamp(1), "datetime64[ns]"), - (Timestamp(0, tz="UTC"), Timestamp(1, tz="UTC"), "datetime64[ns, UTC]"), - (Timedelta(0), Timedelta(1), "timedelta64[ns]"), - ], -) -def test_infer_from_interval(left, right, subtype, closed, pandas_dtype): - # GH 30337 - interval = Interval(left, right, closed) - result_dtype, result_value = infer_dtype_from_scalar(interval, pandas_dtype) - expected_dtype = f"interval[{subtype}]" if pandas_dtype else np.object_ - assert result_dtype == expected_dtype - assert result_value == interval - - def test_infer_dtype_from_scalar_errors(): msg = "invalid ndarray passed to infer_dtype_from_scalar" diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 014581682ac59..46ca7bd8f760a 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -183,7 +183,7 @@ def _values_for_factorize(self): def _values_for_argsort(self): # Disable NumPy's shape inference by including an empty tuple... - # If all the elements of self are the same size P, NumPy will + # If all the elemnts of self are the same size P, NumPy will # cast them to an (N, P) array, instead of an (N,) array of tuples. frozen = [()] + [tuple(x.items()) for x in self] return np.array(frozen, dtype=object)[1:] diff --git a/pandas/tests/frame/methods/__init__.py b/pandas/tests/frame/methods/__init__.py deleted file mode 100644 index 245594bfdc9e7..0000000000000 --- a/pandas/tests/frame/methods/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Test files dedicated to individual (stand-alone) DataFrame methods - -Ideally these files/tests should correspond 1-to-1 with tests.series.methods - -These may also present opportunities for sharing/de-duplicating test code. -""" diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 0653c9dc5f91b..cef389a6c4167 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1495,6 +1495,112 @@ def test_sum_bools(self): bools = isna(df) assert bools.sum(axis=1)[0] == 10 + # --------------------------------------------------------------------- + # Cumulative Reductions - cumsum, cummax, ... + + def test_cumsum_corner(self): + dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) + # ?(wesm) + result = dm.cumsum() # noqa + + def test_cumsum(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumsum = datetime_frame.cumsum() + expected = datetime_frame.apply(Series.cumsum) + tm.assert_frame_equal(cumsum, expected) + + # axis = 1 + cumsum = datetime_frame.cumsum(axis=1) + expected = datetime_frame.apply(Series.cumsum, axis=1) + tm.assert_frame_equal(cumsum, expected) + + # works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + result = df.cumsum() # noqa + + # fix issue + cumsum_xs = datetime_frame.cumsum(axis=1) + assert np.shape(cumsum_xs) == np.shape(datetime_frame) + + def test_cumprod(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumprod = datetime_frame.cumprod() + expected = datetime_frame.apply(Series.cumprod) + tm.assert_frame_equal(cumprod, expected) + + # axis = 1 + cumprod = datetime_frame.cumprod(axis=1) + expected = datetime_frame.apply(Series.cumprod, axis=1) + tm.assert_frame_equal(cumprod, expected) + + # fix issue + cumprod_xs = datetime_frame.cumprod(axis=1) + assert np.shape(cumprod_xs) == np.shape(datetime_frame) + + # ints + df = datetime_frame.fillna(0).astype(int) + df.cumprod(0) + df.cumprod(1) + + # ints32 + df = datetime_frame.fillna(0).astype(np.int32) + df.cumprod(0) + df.cumprod(1) + + def test_cummin(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummin = datetime_frame.cummin() + expected = datetime_frame.apply(Series.cummin) + tm.assert_frame_equal(cummin, expected) + + # axis = 1 + cummin = datetime_frame.cummin(axis=1) + expected = datetime_frame.apply(Series.cummin, axis=1) + tm.assert_frame_equal(cummin, expected) + + # it works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + result = df.cummin() # noqa + + # fix issue + cummin_xs = datetime_frame.cummin(axis=1) + assert np.shape(cummin_xs) == np.shape(datetime_frame) + + def test_cummax(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummax = datetime_frame.cummax() + expected = datetime_frame.apply(Series.cummax) + tm.assert_frame_equal(cummax, expected) + + # axis = 1 + cummax = datetime_frame.cummax(axis=1) + expected = datetime_frame.apply(Series.cummax, axis=1) + tm.assert_frame_equal(cummax, expected) + + # it works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + result = df.cummax() # noqa + + # fix issue + cummax_xs = datetime_frame.cummax(axis=1) + assert np.shape(cummax_xs) == np.shape(datetime_frame) + # --------------------------------------------------------------------- # Miscellanea diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/test_asof.py similarity index 100% rename from pandas/tests/frame/methods/test_asof.py rename to pandas/tests/frame/test_asof.py diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index adec846802e66..ad6e0c963e730 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -25,7 +25,6 @@ date_range, isna, ) -from pandas.arrays import IntervalArray, PeriodArray from pandas.core.construction import create_series_with_explicit_dtype import pandas.util.testing as tm @@ -2397,21 +2396,6 @@ class List(list): result = DataFrame(List([List([1, 2, 3]), List([4, 5, 6])])) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "extension_arr", - [ - Categorical(list("aabbc")), - pd.SparseArray([1, np.nan, np.nan, np.nan]), - IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), - PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")), - ], - ) - def test_constructor_with_extension_array(self, extension_arr): - # GH11363 - expected = DataFrame(Series(extension_arr)) - result = DataFrame(extension_arr) - tm.assert_frame_equal(result, expected) - class TestDataFrameConstructorWithDatetimeTZ: def test_from_dict(self): diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py deleted file mode 100644 index ad2cbff888b2e..0000000000000 --- a/pandas/tests/frame/test_cumulative.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -Tests for DataFrame cumulative operations - -See also --------- -tests.series.test_cumulative -""" - -import numpy as np - -from pandas import DataFrame, Series -import pandas.util.testing as tm - - -class TestDataFrameCumulativeOps: - # --------------------------------------------------------------------- - # Cumulative Operations - cumsum, cummax, ... - - def test_cumsum_corner(self): - dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) - # TODO(wesm): do something with this? - result = dm.cumsum() # noqa - - def test_cumsum(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumsum = datetime_frame.cumsum() - expected = datetime_frame.apply(Series.cumsum) - tm.assert_frame_equal(cumsum, expected) - - # axis = 1 - cumsum = datetime_frame.cumsum(axis=1) - expected = datetime_frame.apply(Series.cumsum, axis=1) - tm.assert_frame_equal(cumsum, expected) - - # works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - df.cumsum() - - # fix issue - cumsum_xs = datetime_frame.cumsum(axis=1) - assert np.shape(cumsum_xs) == np.shape(datetime_frame) - - def test_cumprod(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumprod = datetime_frame.cumprod() - expected = datetime_frame.apply(Series.cumprod) - tm.assert_frame_equal(cumprod, expected) - - # axis = 1 - cumprod = datetime_frame.cumprod(axis=1) - expected = datetime_frame.apply(Series.cumprod, axis=1) - tm.assert_frame_equal(cumprod, expected) - - # fix issue - cumprod_xs = datetime_frame.cumprod(axis=1) - assert np.shape(cumprod_xs) == np.shape(datetime_frame) - - # ints - df = datetime_frame.fillna(0).astype(int) - df.cumprod(0) - df.cumprod(1) - - # ints32 - df = datetime_frame.fillna(0).astype(np.int32) - df.cumprod(0) - df.cumprod(1) - - def test_cummin(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummin = datetime_frame.cummin() - expected = datetime_frame.apply(Series.cummin) - tm.assert_frame_equal(cummin, expected) - - # axis = 1 - cummin = datetime_frame.cummin(axis=1) - expected = datetime_frame.apply(Series.cummin, axis=1) - tm.assert_frame_equal(cummin, expected) - - # it works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - df.cummin() - - # fix issue - cummin_xs = datetime_frame.cummin(axis=1) - assert np.shape(cummin_xs) == np.shape(datetime_frame) - - def test_cummax(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummax = datetime_frame.cummax() - expected = datetime_frame.apply(Series.cummax) - tm.assert_frame_equal(cummax, expected) - - # axis = 1 - cummax = datetime_frame.cummax(axis=1) - expected = datetime_frame.apply(Series.cummax, axis=1) - tm.assert_frame_equal(cummax, expected) - - # it works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - df.cummax() - - # fix issue - cummax_xs = datetime_frame.cummax(axis=1) - assert np.shape(cummax_xs) == np.shape(datetime_frame) diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/test_explode.py similarity index 100% rename from pandas/tests/frame/methods/test_explode.py rename to pandas/tests/frame/test_explode.py diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/test_quantile.py similarity index 100% rename from pandas/tests/frame/methods/test_quantile.py rename to pandas/tests/frame/test_quantile.py diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/test_rank.py similarity index 100% rename from pandas/tests/frame/methods/test_rank.py rename to pandas/tests/frame/test_rank.py diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/test_replace.py similarity index 100% rename from pandas/tests/frame/methods/test_replace.py rename to pandas/tests/frame/test_replace.py diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 5c39dcc1a7659..ad058faff96e7 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -21,7 +21,7 @@ import pandas.core.common as com import pandas.util.testing as tm -from pandas.io.common import get_handle +from pandas.io.common import _get_handle MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] MIXED_INT_DTYPES = [ @@ -1065,7 +1065,7 @@ def test_to_csv_compression(self, df, encoding, compression): tm.assert_frame_equal(df, result) # test the round trip using file handle - to_csv -> read_csv - f, _handles = get_handle( + f, _handles = _get_handle( filename, "w", compression=compression, encoding=encoding ) with f: diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 36cdaa8a6029b..f95137cd1bf88 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -798,7 +798,7 @@ def test_daterange_bug_456(self): # GH #456 rng1 = bdate_range("12/5/2011", "12/5/2011") rng2 = bdate_range("12/2/2011", "12/5/2011") - rng2._data.freq = BDay() # TODO: shouldn't this already be set? + rng2._data.freq = BDay() # TODO: shouldnt this already be set? result = rng1.union(rng2) assert isinstance(result, DatetimeIndex) @@ -855,7 +855,7 @@ def test_daterange_bug_456(self): # GH #456 rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C") rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C") - rng2._data.freq = CDay() # TODO: shouldn't this already be set? + rng2._data.freq = CDay() # TODO: shouldnt this already be set? result = rng1.union(rng2) assert isinstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 6e919571d1423..08c14c36a195e 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1061,7 +1061,7 @@ class TestToDatetimeUnit: @pytest.mark.parametrize("cache", [True, False]) def test_unit(self, cache): # GH 11758 - # test proper behavior with errors + # test proper behavior with erros with pytest.raises(ValueError): to_datetime([1], unit="D", format="%Y%m%d", cache=cache) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 17ab85033acfb..d24f91a2c9e13 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -228,7 +228,7 @@ def test_insert(self): def test_delete(self): idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx") - # preserve freq + # prserve freq expected_0 = timedelta_range(start="2 Days", periods=4, freq="D", name="idx") expected_4 = timedelta_range(start="1 Days", periods=4, freq="D", name="idx") @@ -257,7 +257,7 @@ def test_delete(self): def test_delete_slice(self): idx = timedelta_range(start="1 days", periods=10, freq="D", name="idx") - # preserve freq + # prserve freq expected_0_2 = timedelta_range(start="4 days", periods=7, freq="D", name="idx") expected_7_9 = timedelta_range(start="1 days", periods=7, freq="D", name="idx") diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 353d309a84823..f52c6b8858fd3 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -7,11 +7,11 @@ import numpy as np import pytest -from pandas._libs.parsers import STR_NA_VALUES - from pandas import DataFrame, Index, MultiIndex import pandas.util.testing as tm +import pandas.io.common as com + def test_string_nas(all_parsers): parser = all_parsers @@ -99,7 +99,7 @@ def test_default_na_values(all_parsers): "#N/A N/A", "", } - assert _NA_VALUES == STR_NA_VALUES + assert _NA_VALUES == com._NA_VALUES parser = all_parsers nv = len(_NA_VALUES) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 3cd9d9cdd67d2..d9a76fe97f813 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -3,7 +3,6 @@ from distutils.version import LooseVersion from io import BytesIO import os -from pathlib import Path import re from warnings import catch_warnings, simplefilter @@ -4595,9 +4594,12 @@ def test_read_nokey_empty(self, setup_path): with pytest.raises(ValueError): read_hdf(path) + @td.skip_if_no("pathlib") def test_read_from_pathlib_path(self, setup_path): # GH11773 + from pathlib import Path + expected = DataFrame( np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") ) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 49af18d2935ef..e37561c865c7a 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -1,6 +1,5 @@ import io import os -from pathlib import Path import numpy as np import pytest @@ -69,7 +68,10 @@ def test_from_iterator(self): tm.assert_frame_equal(df, df0.iloc[2:5, :]) rdr.close() + @td.skip_if_no("pathlib") def test_path_pathlib(self): + from pathlib import Path + for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index f4efbbeda6311..a15eac89ecedb 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -4,7 +4,6 @@ from io import StringIO import mmap import os -from pathlib import Path import pytest @@ -28,7 +27,14 @@ def __fspath__(self): # Functions that consume a string path and return a string or path-like object -path_types = [str, CustomFSPath, Path] +path_types = [str, CustomFSPath] + +try: + from pathlib import Path + + path_types.append(Path) +except ImportError: + pass try: from py.path import local as LocalPath @@ -67,10 +73,11 @@ def test_expand_user_normal_path(self): assert expanded_name == filename assert os.path.expanduser(filename) == expanded_name + @td.skip_if_no("pathlib") def test_stringify_path_pathlib(self): - rel_path = icom.stringify_path(Path(".")) + rel_path = icom._stringify_path(Path(".")) assert rel_path == "." - redundant_path = icom.stringify_path(Path("foo//bar")) + redundant_path = icom._stringify_path(Path("foo//bar")) assert redundant_path == os.path.join("foo", "bar") @td.skip_if_no("py.path") @@ -78,11 +85,11 @@ def test_stringify_path_localpath(self): path = os.path.join("foo", "bar") abs_path = os.path.abspath(path) lpath = LocalPath(path) - assert icom.stringify_path(lpath) == abs_path + assert icom._stringify_path(lpath) == abs_path def test_stringify_path_fspath(self): p = CustomFSPath("foo/bar.csv") - result = icom.stringify_path(p) + result = icom._stringify_path(p) assert result == "foo/bar.csv" @pytest.mark.parametrize( @@ -92,7 +99,7 @@ def test_stringify_path_fspath(self): @pytest.mark.parametrize("path_type", path_types) def test_infer_compression_from_path(self, extension, expected, path_type): path = path_type("foo/bar.csv" + extension) - compression = icom.infer_compression(path, compression="infer") + compression = icom._infer_compression(path, compression="infer") assert compression == expected def test_get_filepath_or_buffer_with_path(self): @@ -313,18 +320,18 @@ def test_constructor_bad_file(self, mmap_file): err = mmap.error with pytest.raises(err, match=msg): - icom._MMapWrapper(non_file) + icom.MMapWrapper(non_file) target = open(mmap_file, "r") target.close() msg = "I/O operation on closed file" with pytest.raises(ValueError, match=msg): - icom._MMapWrapper(target) + icom.MMapWrapper(target) def test_get_attr(self, mmap_file): with open(mmap_file, "r") as target: - wrapper = icom._MMapWrapper(target) + wrapper = icom.MMapWrapper(target) attrs = dir(wrapper.mmap) attrs = [attr for attr in attrs if not attr.startswith("__")] @@ -337,7 +344,7 @@ def test_get_attr(self, mmap_file): def test_next(self, mmap_file): with open(mmap_file, "r") as target: - wrapper = icom._MMapWrapper(target) + wrapper = icom.MMapWrapper(target) lines = target.readlines() for line in lines: diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index e17a32cbc8b68..54eb2d78fb64f 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -44,14 +44,14 @@ def test_compression_size(obj, method, compression_only): @pytest.mark.parametrize("method", ["to_csv", "to_json"]) def test_compression_size_fh(obj, method, compression_only): with tm.ensure_clean() as path: - f, handles = icom.get_handle(path, "w", compression=compression_only) + f, handles = icom._get_handle(path, "w", compression=compression_only) with f: getattr(obj, method)(f) assert not f.closed assert f.closed compressed_size = os.path.getsize(path) with tm.ensure_clean() as path: - f, handles = icom.get_handle(path, "w", compression=None) + f, handles = icom._get_handle(path, "w", compression=None) with f: getattr(obj, method)(f) assert not f.closed @@ -108,7 +108,7 @@ def test_compression_warning(compression_only): columns=["X", "Y", "Z"], ) with tm.ensure_clean() as path: - f, handles = icom.get_handle(path, "w", compression=compression_only) + f, handles = icom._get_handle(path, "w", compression=compression_only) with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): with f: df.to_csv(f, compression=compression_only) diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 71a186dc2f3b0..5cea4fb5acca0 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -84,7 +84,7 @@ def test_matplotlib_formatters(self): units = pytest.importorskip("matplotlib.units") # Can't make any assertion about the start state. - # We we check that toggling converters off removes it, and toggling it + # We we check that toggling converters off remvoes it, and toggling it # on restores it. with cf.option_context("plotting.matplotlib.register_converters", True): diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 25609cb852ed4..512a83ed304d1 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -201,17 +201,17 @@ class TestTimestampConstructors: def test_constructor(self): base_str = "2014-07-01 09:00" base_dt = datetime(2014, 7, 1, 9) - base_expected = 1_404_205_200_000_000_000 + base_expected = 1404205200000000000 # confirm base representation is correct - assert calendar.timegm(base_dt.timetuple()) * 1_000_000_000 == base_expected + assert calendar.timegm(base_dt.timetuple()) * 1000000000 == base_expected tests = [ (base_str, base_dt, base_expected), ( "2014-07-01 10:00", datetime(2014, 7, 1, 10), - base_expected + 3600 * 1_000_000_000, + base_expected + 3600 * 1000000000, ), ( "2014-07-01 09:00:00.000008000", @@ -250,7 +250,7 @@ def test_constructor(self): # with timezone for tz, offset in timezones: for result in [Timestamp(date_str, tz=tz), Timestamp(date, tz=tz)]: - expected_tz = expected - offset * 3600 * 1_000_000_000 + expected_tz = expected - offset * 3600 * 1000000000 assert result.value == expected_tz assert conversion.pydt_to_i8(result) == expected_tz @@ -264,7 +264,7 @@ def test_constructor(self): result = Timestamp(result).tz_convert("UTC") else: result = Timestamp(result, tz="UTC") - expected_utc = expected - offset * 3600 * 1_000_000_000 + expected_utc = expected - offset * 3600 * 1000000000 assert result.value == expected_utc assert conversion.pydt_to_i8(result) == expected_utc @@ -272,14 +272,14 @@ def test_constructor_with_stringoffset(self): # GH 7833 base_str = "2014-07-01 11:00:00+02:00" base_dt = datetime(2014, 7, 1, 9) - base_expected = 1_404_205_200_000_000_000 + base_expected = 1404205200000000000 # confirm base representation is correct - assert calendar.timegm(base_dt.timetuple()) * 1_000_000_000 == base_expected + assert calendar.timegm(base_dt.timetuple()) * 1000000000 == base_expected tests = [ (base_str, base_expected), - ("2014-07-01 12:00:00+02:00", base_expected + 3600 * 1_000_000_000), + ("2014-07-01 12:00:00+02:00", base_expected + 3600 * 1000000000), ("2014-07-01 11:00:00.000008000+02:00", base_expected + 8000), ("2014-07-01 11:00:00.000000005+02:00", base_expected + 5), ] @@ -725,7 +725,7 @@ def test_utc_z_designator(self): assert get_timezone(Timestamp("2014-11-02 01:00Z").tzinfo) is utc def test_asm8(self): - np.random.seed(7_960_929) + np.random.seed(7960929) ns = [Timestamp.min.value, Timestamp.max.value, 1000] for n in ns: @@ -786,7 +786,7 @@ def compare(x, y): ) def test_basics_nanos(self): - val = np.int64(946_684_800_000_000_000).view("M8[ns]") + val = np.int64(946684800000000000).view("M8[ns]") stamp = Timestamp(val.view("i8") + 500) assert stamp.year == 2000 assert stamp.month == 1 @@ -794,7 +794,7 @@ def test_basics_nanos(self): assert stamp.nanosecond == 500 # GH 14415 - val = np.iinfo(np.int64).min + 80_000_000_000_000 + val = np.iinfo(np.int64).min + 80000000000000 stamp = Timestamp(val) assert stamp.year == 1677 assert stamp.month == 9 @@ -807,8 +807,8 @@ def test_basics_nanos(self): [ [946688461000000000, {}], [946688461000000000 / 1000, dict(unit="us")], - [946688461000000000 / 1_000_000, dict(unit="ms")], - [946688461000000000 / 1_000_000_000, dict(unit="s")], + [946688461000000000 / 1000000, dict(unit="ms")], + [946688461000000000 / 1000000000, dict(unit="s")], [10957, dict(unit="D", h=0)], [ (946688461000000000 + 500000) / 1000000000, @@ -852,24 +852,24 @@ def test_roundtrip(self): base = Timestamp("20140101 00:00:00") result = Timestamp(base.value + Timedelta("5ms").value) - assert result == Timestamp(f"{base}.005000") + assert result == Timestamp(str(base) + ".005000") assert result.microsecond == 5000 result = Timestamp(base.value + Timedelta("5us").value) - assert result == Timestamp(f"{base}.000005") + assert result == Timestamp(str(base) + ".000005") assert result.microsecond == 5 result = Timestamp(base.value + Timedelta("5ns").value) - assert result == Timestamp(f"{base}.000000005") + assert result == Timestamp(str(base) + ".000000005") assert result.nanosecond == 5 assert result.microsecond == 0 result = Timestamp(base.value + Timedelta("6ms 5us").value) - assert result == Timestamp(f"{base}.006005") + assert result == Timestamp(str(base) + ".006005") assert result.microsecond == 5 + 6 * 1000 result = Timestamp(base.value + Timedelta("200ms 5us").value) - assert result == Timestamp(f"{base}.200005") + assert result == Timestamp(str(base) + ".200005") assert result.microsecond == 5 + 200 * 1000 def test_hash_equivalent(self): @@ -890,12 +890,12 @@ def test_nanosecond_string_parsing(self): ts = Timestamp("2013-05-01 07:15:45.123456789") # GH 7878 expected_repr = "2013-05-01 07:15:45.123456789" - expected_value = 1_367_392_545_123_456_789 + expected_value = 1367392545123456789 assert ts.value == expected_value assert expected_repr in repr(ts) ts = Timestamp("2013-05-01 07:15:45.123456789+09:00", tz="Asia/Tokyo") - assert ts.value == expected_value - 9 * 3600 * 1_000_000_000 + assert ts.value == expected_value - 9 * 3600 * 1000000000 assert expected_repr in repr(ts) ts = Timestamp("2013-05-01 07:15:45.123456789", tz="UTC") @@ -903,7 +903,7 @@ def test_nanosecond_string_parsing(self): assert expected_repr in repr(ts) ts = Timestamp("2013-05-01 07:15:45.123456789", tz="US/Eastern") - assert ts.value == expected_value + 4 * 3600 * 1_000_000_000 + assert ts.value == expected_value + 4 * 3600 * 1000000000 assert expected_repr in repr(ts) # GH 10041 @@ -913,7 +913,7 @@ def test_nanosecond_string_parsing(self): def test_nanosecond_timestamp(self): # GH 7610 - expected = 1_293_840_000_000_000_005 + expected = 1293840000000000005 t = Timestamp("2011-01-01") + offsets.Nano(5) assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" assert t.value == expected @@ -929,7 +929,7 @@ def test_nanosecond_timestamp(self): assert t.value == expected assert t.nanosecond == 5 - expected = 1_293_840_000_000_000_010 + expected = 1293840000000000010 t = t + offsets.Nano(5) assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" assert t.value == expected @@ -949,23 +949,23 @@ def test_nanosecond_timestamp(self): class TestTimestampToJulianDate: def test_compare_1700(self): r = Timestamp("1700-06-23").to_julian_date() - assert r == 2_342_145.5 + assert r == 2342145.5 def test_compare_2000(self): r = Timestamp("2000-04-12").to_julian_date() - assert r == 2_451_646.5 + assert r == 2451646.5 def test_compare_2100(self): r = Timestamp("2100-08-12").to_julian_date() - assert r == 2_488_292.5 + assert r == 2488292.5 def test_compare_hour01(self): r = Timestamp("2000-08-12T01:00:00").to_julian_date() - assert r == 2_451_768.5416666666666666 + assert r == 2451768.5416666666666666 def test_compare_hour13(self): r = Timestamp("2000-08-12T13:00:00").to_julian_date() - assert r == 2_451_769.0416666666666666 + assert r == 2451769.0416666666666666 class TestTimestampConversion: diff --git a/pandas/tests/series/methods/__init__.py b/pandas/tests/series/methods/__init__.py deleted file mode 100644 index bcb0d30f405e2..0000000000000 --- a/pandas/tests/series/methods/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Test files dedicated to individual (stand-alone) Series methods - -Ideally these files/tests should correspond 1-to-1 with tests.frame.methods - -These may also present opportunities for sharing/de-duplicating test code. -""" diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 148c376eba752..0eb4e8a6cfdf3 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from pandas.compat.numpy import _np_version_under1p18 import pandas.util._test_decorators as td import pandas as pd @@ -124,6 +125,116 @@ def test_argsort_stable(self): with pytest.raises(AssertionError, match=msg): tm.assert_numpy_array_equal(qindexer, mindexer) + def test_cumsum(self, datetime_series): + self._check_accum_op("cumsum", datetime_series) + + def test_cumprod(self, datetime_series): + self._check_accum_op("cumprod", datetime_series) + + def test_cummin(self, datetime_series): + tm.assert_numpy_array_equal( + datetime_series.cummin().values, + np.minimum.accumulate(np.array(datetime_series)), + ) + ts = datetime_series.copy() + ts[::2] = np.NaN + result = ts.cummin()[1::2] + expected = np.minimum.accumulate(ts.dropna()) + + tm.assert_series_equal(result, expected) + + def test_cummax(self, datetime_series): + tm.assert_numpy_array_equal( + datetime_series.cummax().values, + np.maximum.accumulate(np.array(datetime_series)), + ) + ts = datetime_series.copy() + ts[::2] = np.NaN + result = ts.cummax()[1::2] + expected = np.maximum.accumulate(ts.dropna()) + + tm.assert_series_equal(result, expected) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummin_datetime64(self): + s = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) + ) + + expected = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"]) + ) + result = s.cummin(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"] + ) + ) + result = s.cummin(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummax_datetime64(self): + s = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) + ) + + expected = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"]) + ) + result = s.cummax(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"] + ) + ) + result = s.cummax(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummin_timedelta64(self): + s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"]) + ) + result = s.cummin(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"]) + ) + result = s.cummin(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummax_timedelta64(self): + s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"]) + ) + result = s.cummax(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"]) + ) + result = s.cummax(skipna=False) + tm.assert_series_equal(expected, result) + def test_np_diff(self): pytest.skip("skipping due to Series no longer being an ndarray") diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/test_asof.py similarity index 100% rename from pandas/tests/series/methods/test_asof.py rename to pandas/tests/series/test_asof.py diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py deleted file mode 100644 index a31cc9d968f3a..0000000000000 --- a/pandas/tests/series/test_cumulative.py +++ /dev/null @@ -1,142 +0,0 @@ -""" -Tests for Series cumulative operations. - -See also --------- -tests.frame.test_cumulative -""" -import numpy as np -import pytest - -from pandas.compat.numpy import _np_version_under1p18 - -import pandas as pd -import pandas.util.testing as tm - - -def _check_accum_op(name, series, check_dtype=True): - func = getattr(np, name) - tm.assert_numpy_array_equal( - func(series).values, func(np.array(series)), check_dtype=check_dtype, - ) - - # with missing values - ts = series.copy() - ts[::2] = np.NaN - - result = func(ts)[1::2] - expected = func(np.array(ts.dropna())) - - tm.assert_numpy_array_equal(result.values, expected, check_dtype=False) - - -class TestSeriesCumulativeOps: - def test_cumsum(self, datetime_series): - _check_accum_op("cumsum", datetime_series) - - def test_cumprod(self, datetime_series): - _check_accum_op("cumprod", datetime_series) - - def test_cummin(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummin().values, - np.minimum.accumulate(np.array(datetime_series)), - ) - ts = datetime_series.copy() - ts[::2] = np.NaN - result = ts.cummin()[1::2] - expected = np.minimum.accumulate(ts.dropna()) - - tm.assert_series_equal(result, expected) - - def test_cummax(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummax().values, - np.maximum.accumulate(np.array(datetime_series)), - ) - ts = datetime_series.copy() - ts[::2] = np.NaN - result = ts.cummax()[1::2] - expected = np.maximum.accumulate(ts.dropna()) - - tm.assert_series_equal(result, expected) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummin_datetime64(self): - s = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) - ) - - expected = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"]) - ) - result = s.cummin(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"] - ) - ) - result = s.cummin(skipna=False) - tm.assert_series_equal(expected, result) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummax_datetime64(self): - s = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) - ) - - expected = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"]) - ) - result = s.cummax(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"] - ) - ) - result = s.cummax(skipna=False) - tm.assert_series_equal(expected, result) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummin_timedelta64(self): - s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"]) - ) - result = s.cummin(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"]) - ) - result = s.cummin(skipna=False) - tm.assert_series_equal(expected, result) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummax_timedelta64(self): - s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"]) - ) - result = s.cummax(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"]) - ) - result = s.cummax(skipna=False) - tm.assert_series_equal(expected, result) diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/test_explode.py similarity index 100% rename from pandas/tests/series/methods/test_explode.py rename to pandas/tests/series/test_explode.py diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index b48c79000c98d..9041d582b19ca 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -9,7 +9,7 @@ from pandas import DataFrame, Series import pandas.util.testing as tm -from pandas.io.common import get_handle +from pandas.io.common import _get_handle class TestSeriesToCSV: @@ -143,7 +143,7 @@ def test_to_csv_compression(self, s, encoding, compression): tm.assert_series_equal(s, result) # test the round trip using file handle - to_csv -> read_csv - f, _handles = get_handle( + f, _handles = _get_handle( filename, "w", compression=compression, encoding=encoding ) with f: diff --git a/pandas/tests/series/methods/test_quantile.py b/pandas/tests/series/test_quantile.py similarity index 100% rename from pandas/tests/series/methods/test_quantile.py rename to pandas/tests/series/test_quantile.py diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/test_rank.py similarity index 100% rename from pandas/tests/series/methods/test_rank.py rename to pandas/tests/series/test_rank.py diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/test_replace.py similarity index 100% rename from pandas/tests/series/methods/test_replace.py rename to pandas/tests/series/test_replace.py diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index ae7ab6addc3fb..2e651c0b35deb 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1825,7 +1825,7 @@ def test_extractall_same_as_extract_subject_index(self): def test_empty_str_methods(self): empty_str = empty = Series(dtype=object) - empty_int = Series(dtype="int64") + empty_int = Series(dtype=int) empty_bool = Series(dtype=bool) empty_bytes = Series(dtype=object) @@ -3526,12 +3526,6 @@ def test_string_array(any_string_method): assert result.dtype == "string" result = result.astype(object) - elif expected.dtype == "object" and lib.is_bool_array( - expected.values, skipna=True - ): - assert result.dtype == "boolean" - result = result.astype(object) - elif expected.dtype == "float" and expected.isna().any(): assert result.dtype == "Int64" result = result.astype("float") @@ -3557,19 +3551,3 @@ def test_string_array_numeric_integer_array(method, expected): result = getattr(s.str, method)("a") expected = Series(expected, dtype="Int64") tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - "method,expected", - [ - ("isdigit", [False, None, True]), - ("isalpha", [True, None, False]), - ("isalnum", [True, None, True]), - ("isdigit", [False, None, True]), - ], -) -def test_string_array_boolean_array(method, expected): - s = Series(["a", None, "1"], dtype="string") - result = getattr(s.str, method)() - expected = Series(expected, dtype="boolean") - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 6f628bf86829a..458d69c1d3216 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -20,7 +20,6 @@ from pandas._libs.tslibs.offsets import ApplyTypeError import pandas.compat as compat from pandas.compat.numpy import np_datetime64_compat -from pandas.errors import PerformanceWarning from pandas.core.indexes.datetimes import DatetimeIndex, _to_M8, date_range from pandas.core.series import Series @@ -44,10 +43,7 @@ CBMonthBegin, CBMonthEnd, CDay, - CustomBusinessDay, CustomBusinessHour, - CustomBusinessMonthBegin, - CustomBusinessMonthEnd, DateOffset, Day, Easter, @@ -611,46 +607,6 @@ def test_add(self, offset_types, tz_naive_fixture): assert isinstance(result, Timestamp) assert result == expected_localize - def test_add_empty_datetimeindex(self, offset_types, tz_naive_fixture): - # GH#12724, GH#30336 - offset_s = self._get_offset(offset_types) - - dti = DatetimeIndex([], tz=tz_naive_fixture) - - warn = None - if isinstance( - offset_s, - ( - Easter, - WeekOfMonth, - LastWeekOfMonth, - CustomBusinessDay, - BusinessHour, - CustomBusinessHour, - CustomBusinessMonthBegin, - CustomBusinessMonthEnd, - FY5253, - FY5253Quarter, - ), - ): - # We don't have an optimized apply_index - warn = PerformanceWarning - - with tm.assert_produces_warning(warn): - result = dti + offset_s - tm.assert_index_equal(result, dti) - with tm.assert_produces_warning(warn): - result = offset_s + dti - tm.assert_index_equal(result, dti) - - dta = dti._data - with tm.assert_produces_warning(warn): - result = dta + offset_s - tm.assert_equal(result, dta) - with tm.assert_produces_warning(warn): - result = offset_s + dta - tm.assert_equal(result, dta) - def test_pickle_v0_15_2(self, datapath): offsets = { "DateOffset": DateOffset(years=1), diff --git a/setup.cfg b/setup.cfg index c7d3394568f9c..62d9f2e6056bb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -122,10 +122,6 @@ skip = pandas/__init__.py,pandas/core/api.py [mypy] ignore_missing_imports=True no_implicit_optional=True -check_untyped_defs=True - -[mypy-pandas.tests.*] -check_untyped_defs=False [mypy-pandas.conftest] ignore_errors=True @@ -147,228 +143,3 @@ ignore_errors=True [mypy-pandas.tests.scalar.period.test_period] ignore_errors=True - -[mypy-pandas._version] -check_untyped_defs=False - -[mypy-pandas.core.arrays.boolean] -check_untyped_defs=False - -[mypy-pandas.core.arrays.categorical] -check_untyped_defs=False - -[mypy-pandas.core.arrays.integer] -check_untyped_defs=False - -[mypy-pandas.core.arrays.interval] -check_untyped_defs=False - -[mypy-pandas.core.arrays.sparse.array] -check_untyped_defs=False - -[mypy-pandas.core.base] -check_untyped_defs=False - -[mypy-pandas.core.computation.align] -check_untyped_defs=False - -[mypy-pandas.core.computation.eval] -check_untyped_defs=False - -[mypy-pandas.core.computation.expr] -check_untyped_defs=False - -[mypy-pandas.core.computation.expressions] -check_untyped_defs=False - -[mypy-pandas.core.computation.ops] -check_untyped_defs=False - -[mypy-pandas.core.computation.pytables] -check_untyped_defs=False - -[mypy-pandas.core.computation.scope] -check_untyped_defs=False - -[mypy-pandas.core.config_init] -check_untyped_defs=False - -[mypy-pandas.core.dtypes.cast] -check_untyped_defs=False - -[mypy-pandas.core.dtypes.generic] -check_untyped_defs=False - -[mypy-pandas.core.frame] -check_untyped_defs=False - -[mypy-pandas.core.generic] -check_untyped_defs=False - -[mypy-pandas.core.groupby.generic] -check_untyped_defs=False - -[mypy-pandas.core.groupby.grouper] -check_untyped_defs=False - -[mypy-pandas.core.groupby.ops] -check_untyped_defs=False - -[mypy-pandas.core.indexes.base] -check_untyped_defs=False - -[mypy-pandas.core.indexes.category] -check_untyped_defs=False - -[mypy-pandas.core.indexes.datetimelike] -check_untyped_defs=False - -[mypy-pandas.core.indexes.datetimes] -check_untyped_defs=False - -[mypy-pandas.core.indexes.interval] -check_untyped_defs=False - -[mypy-pandas.core.indexes.multi] -check_untyped_defs=False - -[mypy-pandas.core.indexes.timedeltas] -check_untyped_defs=False - -[mypy-pandas.core.indexing] -check_untyped_defs=False - -[mypy-pandas.core.internals.blocks] -check_untyped_defs=False - -[mypy-pandas.core.internals.concat] -check_untyped_defs=False - -[mypy-pandas.core.internals.construction] -check_untyped_defs=False - -[mypy-pandas.core.internals.managers] -check_untyped_defs=False - -[mypy-pandas.core.missing] -check_untyped_defs=False - -[mypy-pandas.core.nanops] -check_untyped_defs=False - -[mypy-pandas.core.ops.docstrings] -check_untyped_defs=False - -[mypy-pandas.core.resample] -check_untyped_defs=False - -[mypy-pandas.core.reshape.merge] -check_untyped_defs=False - -[mypy-pandas.core.reshape.reshape] -check_untyped_defs=False - -[mypy-pandas.core.series] -check_untyped_defs=False - -[mypy-pandas.core.strings] -check_untyped_defs=False - -[mypy-pandas.core.tools.datetimes] -check_untyped_defs=False - -[mypy-pandas.core.window.common] -check_untyped_defs=False - -[mypy-pandas.core.window.ewm] -check_untyped_defs=False - -[mypy-pandas.core.window.expanding] -check_untyped_defs=False - -[mypy-pandas.core.window.rolling] -check_untyped_defs=False - -[mypy-pandas.io.clipboard] -check_untyped_defs=False - -[mypy-pandas.io.excel._base] -check_untyped_defs=False - -[mypy-pandas.io.excel._openpyxl] -check_untyped_defs=False - -[mypy-pandas.io.excel._util] -check_untyped_defs=False - -[mypy-pandas.io.excel._xlwt] -check_untyped_defs=False - -[mypy-pandas.io.formats.console] -check_untyped_defs=False - -[mypy-pandas.io.formats.css] -check_untyped_defs=False - -[mypy-pandas.io.formats.excel] -check_untyped_defs=False - -[mypy-pandas.io.formats.format] -check_untyped_defs=False - -[mypy-pandas.io.formats.style] -check_untyped_defs=False - -[mypy-pandas.io.html] -check_untyped_defs=False - -[mypy-pandas.io.json._json] -check_untyped_defs=False - -[mypy-pandas.io.json._normalize] -check_untyped_defs=False - -[mypy-pandas.io.json._table_schema] -check_untyped_defs=False - -[mypy-pandas.io.parsers] -check_untyped_defs=False - -[mypy-pandas.io.pytables] -check_untyped_defs=False - -[mypy-pandas.io.sas.sas_xport] -check_untyped_defs=False - -[mypy-pandas.io.sas.sas7bdat] -check_untyped_defs=False - -[mypy-pandas.io.sas.sasreader] -check_untyped_defs=False - -[mypy-pandas.io.sql] -check_untyped_defs=False - -[mypy-pandas.io.stata] -check_untyped_defs=False - -[mypy-pandas.plotting._matplotlib.converter] -check_untyped_defs=False - -[mypy-pandas.plotting._matplotlib.core] -check_untyped_defs=False - -[mypy-pandas.plotting._matplotlib.misc] -check_untyped_defs=False - -[mypy-pandas.plotting._matplotlib.timeseries] -check_untyped_defs=False - -[mypy-pandas.tseries.holiday] -check_untyped_defs=False - -[mypy-pandas.tseries.offsets] -check_untyped_defs=False - -[mypy-pandas.util.testing] -check_untyped_defs=False diff --git a/setup.py b/setup.py index c6b078dae280a..45f3af3d5c374 100755 --- a/setup.py +++ b/setup.py @@ -63,15 +63,24 @@ def is_platform_mac(): from distutils.extension import Extension # noqa: E402 isort:skip from distutils.command.build import build # noqa: E402 isort:skip -if _CYTHON_INSTALLED: +try: + if not _CYTHON_INSTALLED: + raise ImportError("No supported version of Cython installed.") from Cython.Distutils.old_build_ext import old_build_ext as _build_ext cython = True - from Cython import Tempita as tempita -else: +except ImportError: from distutils.command.build_ext import build_ext as _build_ext cython = False +else: + try: + try: + from Cython import Tempita as tempita + except ImportError: + import tempita + except ImportError: + raise ImportError("Building pandas requires Tempita: pip install Tempita") _pxi_dep_template = { From 16217f0099e640498a5caaaceedc4b972f27d1bc Mon Sep 17 00:00:00 2001 From: Luca Ionescu Date: Mon, 23 Dec 2019 00:08:24 +0100 Subject: [PATCH 36/37] fix tm.assert_frame_equal. use naming conventions. --- pandas/tests/io/json/test_pandas.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index cce9a52e5077d..9000b86dd4167 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1606,9 +1606,9 @@ def test_json_negative_indent_raises(self): @pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") def test_deprecate_numpy_argument_read_json(self): # https://github.com/pandas-dev/pandas/issues/28512 - df = DataFrame([1, 2, 3]) + expected = DataFrame([1, 2, 3]) with tm.assert_produces_warning(None): with catch_warnings(): filterwarnings("ignore", category=FutureWarning) - result = read_json(df.to_json(), numpy=True) - assert_frame_equal(result, df) + result = read_json(expected.to_json(), numpy=True) + tm.assert_frame_equal(result, expected) From 640f729cdccb3bc5b69ab136ca381c2bebe752be Mon Sep 17 00:00:00 2001 From: Luca Ionescu Date: Mon, 23 Dec 2019 00:54:32 +0100 Subject: [PATCH 37/37] sort imports correctly. --- pandas/io/json/_json.py | 2 +- pandas/tests/io/json/test_pandas.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index f73a314d4da29..7e43a0eaca3e0 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -10,6 +10,7 @@ import pandas._libs.json as json from pandas._libs.tslibs import iNaT from pandas.errors import AbstractMethodError +from pandas.util._decorators import deprecate_kwarg from pandas.core.dtypes.common import ensure_str, is_period_dtype @@ -27,7 +28,6 @@ ) from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import _validate_integer -from pandas.util._decorators import deprecate_kwarg from ._normalize import convert_to_line_delimits from ._table_schema import build_table_schema, parse_table_schema diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9000b86dd4167..ff18febca44d6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,9 +1,9 @@ from collections import OrderedDict from datetime import timedelta from io import StringIO -from warnings import catch_warnings, filterwarnings import json import os +from warnings import catch_warnings, filterwarnings import numpy as np import pytest