From 8a3360cbf20a0a283009def9e53726aa8f42fc3a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 17 Jan 2022 00:38:44 +0100 Subject: [PATCH] Backport PR #44839: REGR: allow reindexing datetimelike with upcast / raise deprecation warning --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/array_algos/take.py | 6 ++++++ pandas/core/dtypes/cast.py | 2 +- pandas/tests/frame/methods/test_reindex.py | 7 +++++++ pandas/tests/series/methods/test_reindex.py | 20 ++++++++++++++++++++ 5 files changed, 35 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2a77b755d5076..2c501fa9e05f2 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -784,6 +784,7 @@ Datetimelike - Bug in :meth:`Index.insert` for inserting ``np.datetime64``, ``np.timedelta64`` or ``tuple`` into :class:`Index` with ``dtype='object'`` with negative loc adding ``None`` and replacing existing value (:issue:`44509`) - Bug in :meth:`Timestamp.to_pydatetime` failing to retain the ``fold`` attribute (:issue:`45087`) - Bug in :meth:`Series.mode` with ``DatetimeTZDtype`` incorrectly returning timezone-naive and ``PeriodDtype`` incorrectly raising (:issue:`41927`) +- Fixed regression in :meth:`~Series.reindex` raising an error when using an incompatible fill value with a datetime-like dtype (or not raising a deprecation warning for using a ``datetime.date`` as fill value) (:issue:`42921`) - Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`) - Bug in :meth:`Timestamp.fromtimestamp` not supporting the ``tz`` argument (:issue:`45083`) - Bug in :class:`DataFrame` construction from dict of :class:`Series` with mismatched index dtypes sometimes raising depending on the ordering of the passed dict (:issue:`44091`) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index c4b8f833f4ad3..188725f003f1e 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -94,6 +94,12 @@ def take_nd( """ if fill_value is lib.no_default: fill_value = na_value_for_dtype(arr.dtype, compat=False) + elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM": + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if arr.dtype != dtype: + # EA.take is strict about returning a new object of the same type + # so for that case cast upfront + arr = arr.astype(dtype) if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e21e30e0744bd..03fe4c00aa28d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -575,7 +575,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): except (ValueError, TypeError): pass else: - if fv.tz is None: + if isna(fv) or fv.tz is None: return dtype, fv.asm8 return np.dtype("object"), fill_value diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 6eee1c57287e2..8575e7895ae5a 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -8,6 +8,7 @@ import pytest from pandas._libs.tslibs.timezones import dateutil_gettz as gettz +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -133,6 +134,7 @@ def test_reindex_copies(self): result2 = df.reindex(columns=cols, index=df.index, copy=True) assert not np.shares_memory(result2[0]._values, df[0]._values) + @td.skip_array_manager_not_yet_implemented def test_reindex_date_fill_value(self): # passing date to dt64 is deprecated arr = date_range("2016-01-01", periods=6).values.reshape(3, 2) @@ -149,6 +151,11 @@ def test_reindex_date_fill_value(self): ) tm.assert_frame_equal(res, expected) + # only reindexing rows + with tm.assert_produces_warning(FutureWarning): + res = df.reindex(index=range(4), fill_value=fv) + tm.assert_frame_equal(res, expected[["A", "B"]]) + # same with a datetime-castable str res = df.reindex( index=range(4), columns=["A", "B", "C"], fill_value="2016-01-01" diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index a500252a44e07..e0f1491f49485 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -9,6 +9,8 @@ Period, PeriodIndex, Series, + Timedelta, + Timestamp, date_range, isna, ) @@ -296,6 +298,24 @@ def test_reindex_fill_value(): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) +@pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)]) +def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager): + # https://github.com/pandas-dev/pandas/issues/42921 + if using_array_manager: + pytest.skip("Array manager does not promote dtype, hence we fail") + + if dtype == "timedelta64[ns]" and fill_value == Timedelta(0): + # use the scalar that is not compatible with the dtype for this test + fill_value = Timestamp(0) + + ser = Series([NaT], dtype=dtype) + + result = ser.reindex([0, 1], fill_value=fill_value) + expected = Series([None, fill_value], index=[0, 1], dtype=object) + tm.assert_series_equal(result, expected) + + def test_reindex_datetimeindexes_tz_naive_and_aware(): # GH 8306 idx = date_range("20131101", tz="America/Chicago", periods=7)