diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 0bc91d3cd9637..ffba0233b3106 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -118,6 +118,7 @@ Other API changes - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`) - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser. - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`) +- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4c3e790c2879b..224477fa711b4 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -20,7 +20,9 @@ Tick, Timedelta, astype_overflowsafe, + get_unit_from_dtype, iNaT, + is_supported_unit, periods_per_second, to_offset, ) @@ -304,6 +306,18 @@ def astype(self, dtype, copy: bool = True): dtype = pandas_dtype(dtype) if dtype.kind == "m": + if dtype == self.dtype: + if copy: + return self.copy() + return self + + if is_supported_unit(get_unit_from_dtype(dtype)): + # unit conversion e.g. timedelta64[s] + res_values = astype_overflowsafe(self._ndarray, dtype, copy=False) + return type(self)._simple_new( + res_values, dtype=res_values.dtype, freq=self.freq + ) + return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy) return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 892b53a261b26..d495a0614b227 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -136,6 +136,14 @@ def astype_nansafe( return arr.view(dtype) elif dtype.kind == "m": + # TODO(2.0): change to use the same logic as TDA.astype, i.e. + # giving the requested dtype for supported units (s, ms, us, ns) + # and doing the old convert-to-float behavior otherwise. + if is_supported_unit(get_unit_from_dtype(arr.dtype)): + from pandas.core.construction import ensure_wrapped_if_datetimelike + + arr = ensure_wrapped_if_datetimelike(arr) + return arr.astype(dtype, copy=copy) return astype_td64_unit_conversion(arr, dtype, copy=copy) raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 7e7ce4321fb64..28c264d5c7335 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -480,12 +480,19 @@ def test_astype_to_timedelta_unit_ns(self, unit): @pytest.mark.parametrize("unit", ["us", "ms", "s", "h", "m", "D"]) def test_astype_to_timedelta_unit(self, unit): # coerce to float - # GH#19223 + # GH#19223 until 2.0 used to coerce to float dtype = f"m8[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) result = df.astype(dtype) - expected = DataFrame(df.values.astype(dtype).astype(float)) + + if unit in ["m", "h", "D"]: + # We don't support these, so we use the old logic to convert to float + expected = DataFrame(df.values.astype(dtype).astype(float)) + else: + tda = pd.core.arrays.TimedeltaArray._simple_new(arr, dtype=arr.dtype) + expected = DataFrame(tda) + assert (expected.dtypes == dtype).all() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 6904a847b04ed..701d737535116 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -13,6 +13,7 @@ timedelta_range, ) import pandas._testing as tm +from pandas.core.arrays import TimedeltaArray from pandas.core.indexes.api import Int64Index from pandas.tests.indexes.datetimelike import DatetimeLike @@ -101,19 +102,26 @@ def test_fields(self): assert rng.days.name == "name" def test_freq_conversion_always_floating(self): - # even if we have no NaTs, we get back float64; this matches TDA and Series + # pre-2.0 td64 astype converted to float64. now for supported units + # (s, ms, us, ns) this converts to the requested dtype. + # This matches TDA and Series tdi = timedelta_range("1 Day", periods=30) res = tdi.astype("m8[s]") - expected = Index((tdi.view("i8") / 10**9).astype(np.float64)) + exp_values = np.asarray(tdi).astype("m8[s]") + exp_tda = TimedeltaArray._simple_new( + exp_values, dtype=exp_values.dtype, freq=tdi.freq + ) + expected = Index(exp_tda) + assert expected.dtype == "m8[s]" tm.assert_index_equal(res, expected) # check this matches Series and TimedeltaArray res = tdi._data.astype("m8[s]") - tm.assert_numpy_array_equal(res, expected._values) + tm.assert_equal(res, expected._values) res = tdi.to_series().astype("m8[s]") - tm.assert_numpy_array_equal(res._values, expected._values) + tm.assert_equal(res._values, expected._values._with_freq(None)) def test_freq_conversion(self, index_or_series): @@ -131,6 +139,8 @@ def test_freq_conversion(self, index_or_series): ) tm.assert_equal(result, expected) + # We don't support "D" reso, so we use the pre-2.0 behavior + # casting to float64 result = td.astype("timedelta64[D]") expected = index_or_series([31, 31, 31, np.nan]) tm.assert_equal(result, expected) @@ -141,5 +151,9 @@ def test_freq_conversion(self, index_or_series): ) tm.assert_equal(result, expected) + exp_values = np.asarray(td).astype("m8[s]") + exp_tda = TimedeltaArray._simple_new(exp_values, dtype=exp_values.dtype) + expected = index_or_series(exp_tda) + assert expected.dtype == "m8[s]" result = td.astype("timedelta64[s]") tm.assert_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index e62fb98b0782d..1a17d9df1396d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -649,14 +649,14 @@ def test_loc_setitem_consistency_slice_column_len(self): ) with tm.assert_produces_warning(None, match=msg): - # timedelta64[s] -> float64, so this cannot be done inplace, so + # timedelta64[m] -> float64, so this cannot be done inplace, so # no warning df.loc[:, ("Respondent", "Duration")] = df.loc[ :, ("Respondent", "Duration") - ].astype("timedelta64[s]") + ].astype("timedelta64[m]") expected = Series( - [1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration") + [23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration") ) tm.assert_series_equal(df[("Respondent", "Duration")], expected)