Skip to content

Commit

Permalink
ENH: DTI/DTA.astype support non-nano (#47579)
Browse files Browse the repository at this point in the history
* ENH: DTI/DTA.astype support non-nano

* whatsnew

* GH ref

* pyright fixup
  • Loading branch information
jbrockmendel authored Jul 5, 2022
1 parent 700ef33 commit 67e8c4c
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 14 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,9 @@ Other enhancements
- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_150.notable_bug_fixes:
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@
"get_unit_from_dtype",
"periods_per_day",
"periods_per_second",
"is_supported_unit",
]

from pandas._libs.tslibs import dtypes
from pandas._libs.tslibs.conversion import localize_pydatetime
from pandas._libs.tslibs.dtypes import (
Resolution,
is_supported_unit,
periods_per_day,
periods_per_second,
)
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/dtypes.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ _period_code_map: dict[str, int]

def periods_per_day(reso: int) -> int: ...
def periods_per_second(reso: int) -> int: ...
def is_supported_unit(reso: int) -> bool: ...

class PeriodDtypeBase:
_dtype_code: int # PeriodDtypeCode
Expand Down
9 changes: 9 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,15 @@ class NpyDatetimeUnit(Enum):
NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC


def is_supported_unit(NPY_DATETIMEUNIT reso):
return (
reso == NPY_DATETIMEUNIT.NPY_FR_ns
or reso == NPY_DATETIMEUNIT.NPY_FR_us
or reso == NPY_DATETIMEUNIT.NPY_FR_ms
or reso == NPY_DATETIMEUNIT.NPY_FR_s
)


cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# generic -> default to nanoseconds
Expand Down
21 changes: 18 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
get_unit_from_dtype,
ints_to_pydatetime,
is_date_array_normalized,
is_supported_unit,
is_unitless,
normalize_i8_timestamps,
timezones,
Expand Down Expand Up @@ -603,12 +604,26 @@ def astype(self, dtype, copy: bool = True):
return self.copy()
return self

elif (
self.tz is None
and is_datetime64_dtype(dtype)
and not is_unitless(dtype)
and is_supported_unit(get_unit_from_dtype(dtype))
):
# unit conversion e.g. datetime64[s]
res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
return type(self)._simple_new(res_values, dtype=res_values.dtype)
# TODO: preserve freq?

elif is_datetime64_ns_dtype(dtype):
return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)

elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype:
# unit conversion e.g. datetime64[s]
return self._ndarray.astype(dtype)
elif self.tz is not None and isinstance(dtype, DatetimeTZDtype):
# tzaware unit conversion e.g. datetime64[s, UTC]
np_dtype = np.dtype(dtype.str)
res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
return type(self)._simple_new(res_values, dtype=dtype)
# TODO: preserve freq?

elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
Expand Down
15 changes: 15 additions & 0 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import numpy as np

from pandas._libs import lib
from pandas._libs.tslibs import is_unitless
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas._typing import (
ArrayLike,
Expand Down Expand Up @@ -280,6 +281,20 @@ def astype_array_safe(
# Ensure we don't end up with a PandasArray
dtype = dtype.numpy_dtype

if (
is_datetime64_dtype(values.dtype)
# need to do np.dtype check instead of is_datetime64_dtype
# otherwise pyright complains
and isinstance(dtype, np.dtype)
and dtype.kind == "M"
and not is_unitless(dtype)
and not is_dtype_equal(dtype, values.dtype)
):
# unit conversion, we would re-cast to nanosecond, so this is
# effectively just a copy (regardless of copy kwd)
# TODO(2.0): remove special-case
return values.copy()

try:
new_values = astype_array(values, dtype, copy=copy)
except (ValueError, TypeError):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,9 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
tipo = get_dtype(arr_or_dtype.dtype)
else:
return False
return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE
return tipo == DT64NS_DTYPE or (
isinstance(tipo, DatetimeTZDtype) and tipo._unit == "ns"
)


def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:
Expand Down
10 changes: 0 additions & 10 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,16 +1064,6 @@ def astype(self, dtype, copy: bool = True):
# Ensure that self.astype(self.dtype) is self
return self.copy() if copy else self

if (
self.dtype == np.dtype("M8[ns]")
and isinstance(dtype, np.dtype)
and dtype.kind == "M"
and dtype != np.dtype("M8[ns]")
):
# For now DatetimeArray supports this by unwrapping ndarray,
# but DatetimeIndex doesn't
raise TypeError(f"Cannot cast {type(self).__name__} to dtype")

values = self._data
if isinstance(values, ExtensionArray):
with rewrite_exception(type(values).__name__, type(self).__name__):
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from pandas.core.dtypes.common import (
is_datetime64_dtype,
is_datetime64tz_dtype,
is_dtype_equal,
is_scalar,
)
from pandas.core.dtypes.missing import is_valid_na_for_dtype
Expand Down Expand Up @@ -338,6 +339,18 @@ def __new__(
if copy:
data = data.copy()
return cls._simple_new(data, name=name)
elif (
isinstance(data, DatetimeArray)
and freq is lib.no_default
and tz is None
and is_dtype_equal(data.dtype, dtype)
):
# Reached via Index.__new__ when we call .astype
# TODO(2.0): special casing can be removed once _from_sequence_not_strict
# no longer chokes on non-nano
if copy:
data = data.copy()
return cls._simple_new(data, name=name)

dtarr = DatetimeArray._from_sequence_not_strict(
data,
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,36 @@ def test_cmp_dt64_arraylike_tznaive(self, comparison_op):


class TestDatetimeArray:
def test_astype_non_nano_tznaive(self):
dti = pd.date_range("2016-01-01", periods=3)

res = dti.astype("M8[s]")
assert res.dtype == "M8[s]"

dta = dti._data
res = dta.astype("M8[s]")
assert res.dtype == "M8[s]"
assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray

def test_astype_non_nano_tzaware(self):
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")

res = dti.astype("M8[s, US/Pacific]")
assert res.dtype == "M8[s, US/Pacific]"

dta = dti._data
res = dta.astype("M8[s, US/Pacific]")
assert res.dtype == "M8[s, US/Pacific]"

# from non-nano to non-nano, preserving reso
res2 = res.astype("M8[s, UTC]")
assert res2.dtype == "M8[s, UTC]"
assert not tm.shares_memory(res2, res)

res3 = res.astype("M8[s, UTC]", copy=False)
assert res2.dtype == "M8[s, UTC]"
assert tm.shares_memory(res3, res)

def test_astype_to_same(self):
arr = DatetimeArray._from_sequence(
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,9 @@ def test_is_datetime64_ns_dtype():
pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]"))
)

# non-nano dt64tz
assert not com.is_datetime64_ns_dtype(DatetimeTZDtype("us", "US/Eastern"))


def test_is_timedelta64_ns_dtype():
assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]"))
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/tslibs/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def test_namespace():
"get_unit_from_dtype",
"periods_per_day",
"periods_per_second",
"is_supported_unit",
]

expected = set(submodules + api)
Expand Down

0 comments on commit 67e8c4c

Please sign in to comment.