Skip to content

API: .astype td64->td64 give requested dtype #48963

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.6.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ Other API changes
- Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`)
- :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`)
-

.. ---------------------------------------------------------------------------
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
Tick,
Timedelta,
astype_overflowsafe,
get_unit_from_dtype,
iNaT,
is_supported_unit,
periods_per_second,
to_offset,
)
Expand Down Expand Up @@ -304,6 +306,18 @@ def astype(self, dtype, copy: bool = True):
dtype = pandas_dtype(dtype)

if dtype.kind == "m":
if dtype == self.dtype:
if copy:
return self.copy()
return self

if is_supported_unit(get_unit_from_dtype(dtype)):
# unit conversion e.g. timedelta64[s]
res_values = astype_overflowsafe(self._ndarray, dtype, copy=False)
return type(self)._simple_new(
res_values, dtype=res_values.dtype, freq=self.freq
)

return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy)

return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ def astype_nansafe(
return arr.view(dtype)

elif dtype.kind == "m":
# TODO(2.0): change to use the same logic as TDA.astype, i.e.
# giving the requested dtype for supported units (s, ms, us, ns)
# and doing the old convert-to-float behavior otherwise.
if is_supported_unit(get_unit_from_dtype(arr.dtype)):
from pandas.core.construction import ensure_wrapped_if_datetimelike

arr = ensure_wrapped_if_datetimelike(arr)
return arr.astype(dtype, copy=copy)
return astype_td64_unit_conversion(arr, dtype, copy=copy)

raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/frame/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,12 +480,19 @@ def test_astype_to_timedelta_unit_ns(self, unit):
@pytest.mark.parametrize("unit", ["us", "ms", "s", "h", "m", "D"])
def test_astype_to_timedelta_unit(self, unit):
# coerce to float
# GH#19223
# GH#19223 until 2.0 used to coerce to float
dtype = f"m8[{unit}]"
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(df.values.astype(dtype).astype(float))

if unit in ["m", "h", "D"]:
# We don't support these, so we use the old logic to convert to float
expected = DataFrame(df.values.astype(dtype).astype(float))
else:
tda = pd.core.arrays.TimedeltaArray._simple_new(arr, dtype=arr.dtype)
expected = DataFrame(tda)
assert (expected.dtypes == dtype).all()

tm.assert_frame_equal(result, expected)

Expand Down
22 changes: 18 additions & 4 deletions pandas/tests/indexes/timedeltas/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import TimedeltaArray
from pandas.core.indexes.api import Int64Index
from pandas.tests.indexes.datetimelike import DatetimeLike

Expand Down Expand Up @@ -101,19 +102,26 @@ def test_fields(self):
assert rng.days.name == "name"

def test_freq_conversion_always_floating(self):
# even if we have no NaTs, we get back float64; this matches TDA and Series
# pre-2.0 td64 astype converted to float64. now for supported units
# (s, ms, us, ns) this converts to the requested dtype.
# This matches TDA and Series
tdi = timedelta_range("1 Day", periods=30)

res = tdi.astype("m8[s]")
expected = Index((tdi.view("i8") / 10**9).astype(np.float64))
exp_values = np.asarray(tdi).astype("m8[s]")
exp_tda = TimedeltaArray._simple_new(
exp_values, dtype=exp_values.dtype, freq=tdi.freq
)
expected = Index(exp_tda)
assert expected.dtype == "m8[s]"
tm.assert_index_equal(res, expected)

# check this matches Series and TimedeltaArray
res = tdi._data.astype("m8[s]")
tm.assert_numpy_array_equal(res, expected._values)
tm.assert_equal(res, expected._values)

res = tdi.to_series().astype("m8[s]")
tm.assert_numpy_array_equal(res._values, expected._values)
tm.assert_equal(res._values, expected._values._with_freq(None))

def test_freq_conversion(self, index_or_series):

Expand All @@ -131,6 +139,8 @@ def test_freq_conversion(self, index_or_series):
)
tm.assert_equal(result, expected)

# We don't support "D" reso, so we use the pre-2.0 behavior
# casting to float64
result = td.astype("timedelta64[D]")
expected = index_or_series([31, 31, 31, np.nan])
tm.assert_equal(result, expected)
Expand All @@ -141,5 +151,9 @@ def test_freq_conversion(self, index_or_series):
)
tm.assert_equal(result, expected)

exp_values = np.asarray(td).astype("m8[s]")
exp_tda = TimedeltaArray._simple_new(exp_values, dtype=exp_values.dtype)
expected = index_or_series(exp_tda)
assert expected.dtype == "m8[s]"
result = td.astype("timedelta64[s]")
tm.assert_equal(result, expected)
6 changes: 3 additions & 3 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,14 +649,14 @@ def test_loc_setitem_consistency_slice_column_len(self):
)

with tm.assert_produces_warning(None, match=msg):
# timedelta64[s] -> float64, so this cannot be done inplace, so
# timedelta64[m] -> float64, so this cannot be done inplace, so
# no warning
df.loc[:, ("Respondent", "Duration")] = df.loc[
:, ("Respondent", "Duration")
].astype("timedelta64[s]")
].astype("timedelta64[m]")

expected = Series(
[1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration")
[23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration")
)
tm.assert_series_equal(df[("Respondent", "Duration")], expected)

Expand Down