diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 3b40ad8caf372..0b450fab53137 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -275,7 +275,9 @@ Other enhancements - :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`) - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`) - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) +- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) +- .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 599ddfec5a268..47143b32d6dbe 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -30,12 +30,14 @@ "get_unit_from_dtype", "periods_per_day", "periods_per_second", + "is_supported_unit", ] from pandas._libs.tslibs import dtypes from pandas._libs.tslibs.conversion import localize_pydatetime from pandas._libs.tslibs.dtypes import ( Resolution, + is_supported_unit, periods_per_day, periods_per_second, ) diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi index f8c6a36a63c7e..dd439ebfc4798 100644 --- a/pandas/_libs/tslibs/dtypes.pyi +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -7,6 +7,7 @@ _period_code_map: dict[str, int] def periods_per_day(reso: int) -> int: ... def periods_per_second(reso: int) -> int: ... +def is_supported_unit(reso: int) -> bool: ... class PeriodDtypeBase: _dtype_code: int # PeriodDtypeCode diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 6cbe31cb1dc16..01616666bba3f 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -277,6 +277,15 @@ class NpyDatetimeUnit(Enum): NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC +def is_supported_unit(NPY_DATETIMEUNIT reso): + return ( + reso == NPY_DATETIMEUNIT.NPY_FR_ns + or reso == NPY_DATETIMEUNIT.NPY_FR_us + or reso == NPY_DATETIMEUNIT.NPY_FR_ms + or reso == NPY_DATETIMEUNIT.NPY_FR_s + ) + + cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit): if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # generic -> default to nanoseconds diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ccb20b1dfaf1a..c9f5946c30c8c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -31,6 +31,7 @@ get_unit_from_dtype, ints_to_pydatetime, is_date_array_normalized, + is_supported_unit, is_unitless, normalize_i8_timestamps, timezones, @@ -603,12 +604,26 @@ def astype(self, dtype, copy: bool = True): return self.copy() return self + elif ( + self.tz is None + and is_datetime64_dtype(dtype) + and not is_unitless(dtype) + and is_supported_unit(get_unit_from_dtype(dtype)) + ): + # unit conversion e.g. datetime64[s] + res_values = astype_overflowsafe(self._ndarray, dtype, copy=True) + return type(self)._simple_new(res_values, dtype=res_values.dtype) + # TODO: preserve freq? + elif is_datetime64_ns_dtype(dtype): return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False) - elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype: - # unit conversion e.g. datetime64[s] - return self._ndarray.astype(dtype) + elif self.tz is not None and isinstance(dtype, DatetimeTZDtype): + # tzaware unit conversion e.g. datetime64[s, UTC] + np_dtype = np.dtype(dtype.str) + res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy) + return type(self)._simple_new(res_values, dtype=dtype) + # TODO: preserve freq? elif is_period_dtype(dtype): return self.to_period(freq=dtype.freq) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 8d1427976276c..7fb58468746a8 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -15,6 +15,7 @@ import numpy as np from pandas._libs import lib +from pandas._libs.tslibs import is_unitless from pandas._libs.tslibs.timedeltas import array_to_timedelta64 from pandas._typing import ( ArrayLike, @@ -280,6 +281,20 @@ def astype_array_safe( # Ensure we don't end up with a PandasArray dtype = dtype.numpy_dtype + if ( + is_datetime64_dtype(values.dtype) + # need to do np.dtype check instead of is_datetime64_dtype + # otherwise pyright complains + and isinstance(dtype, np.dtype) + and dtype.kind == "M" + and not is_unitless(dtype) + and not is_dtype_equal(dtype, values.dtype) + ): + # unit conversion, we would re-cast to nanosecond, so this is + # effectively just a copy (regardless of copy kwd) + # TODO(2.0): remove special-case + return values.copy() + try: new_values = astype_array(values, dtype, copy=copy) except (ValueError, TypeError): diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index a192337daf59b..519dfd9269df5 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -966,7 +966,9 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool: tipo = get_dtype(arr_or_dtype.dtype) else: return False - return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE + return tipo == DT64NS_DTYPE or ( + isinstance(tipo, DatetimeTZDtype) and tipo._unit == "ns" + ) def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0393c9d07cc74..87f2ae41cc98e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1064,16 +1064,6 @@ def astype(self, dtype, copy: bool = True): # Ensure that self.astype(self.dtype) is self return self.copy() if copy else self - if ( - self.dtype == np.dtype("M8[ns]") - and isinstance(dtype, np.dtype) - and dtype.kind == "M" - and dtype != np.dtype("M8[ns]") - ): - # For now DatetimeArray supports this by unwrapping ndarray, - # but DatetimeIndex doesn't - raise TypeError(f"Cannot cast {type(self).__name__} to dtype") - values = self._data if isinstance(values, ExtensionArray): with rewrite_exception(type(values).__name__, type(self).__name__): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c1009d63ab8cc..18c0d56abbeb4 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -48,6 +48,7 @@ from pandas.core.dtypes.common import ( is_datetime64_dtype, is_datetime64tz_dtype, + is_dtype_equal, is_scalar, ) from pandas.core.dtypes.missing import is_valid_na_for_dtype @@ -338,6 +339,18 @@ def __new__( if copy: data = data.copy() return cls._simple_new(data, name=name) + elif ( + isinstance(data, DatetimeArray) + and freq is lib.no_default + and tz is None + and is_dtype_equal(data.dtype, dtype) + ): + # Reached via Index.__new__ when we call .astype + # TODO(2.0): special casing can be removed once _from_sequence_not_strict + # no longer chokes on non-nano + if copy: + data = data.copy() + return cls._simple_new(data, name=name) dtarr = DatetimeArray._from_sequence_not_strict( data, diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 6c6a8b269aee8..63601ff963609 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -207,6 +207,36 @@ def test_cmp_dt64_arraylike_tznaive(self, comparison_op): class TestDatetimeArray: + def test_astype_non_nano_tznaive(self): + dti = pd.date_range("2016-01-01", periods=3) + + res = dti.astype("M8[s]") + assert res.dtype == "M8[s]" + + dta = dti._data + res = dta.astype("M8[s]") + assert res.dtype == "M8[s]" + assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray + + def test_astype_non_nano_tzaware(self): + dti = pd.date_range("2016-01-01", periods=3, tz="UTC") + + res = dti.astype("M8[s, US/Pacific]") + assert res.dtype == "M8[s, US/Pacific]" + + dta = dti._data + res = dta.astype("M8[s, US/Pacific]") + assert res.dtype == "M8[s, US/Pacific]" + + # from non-nano to non-nano, preserving reso + res2 = res.astype("M8[s, UTC]") + assert res2.dtype == "M8[s, UTC]" + assert not tm.shares_memory(res2, res) + + res3 = res.astype("M8[s, UTC]", copy=False) + assert res2.dtype == "M8[s, UTC]" + assert tm.shares_memory(res3, res) + def test_astype_to_same(self): arr = DatetimeArray._from_sequence( ["2000"], dtype=DatetimeTZDtype(tz="US/Central") diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index c5d0567b6dfc0..92b99ba6d1fe2 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -474,6 +474,9 @@ def test_is_datetime64_ns_dtype(): pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]")) ) + # non-nano dt64tz + assert not com.is_datetime64_ns_dtype(DatetimeTZDtype("us", "US/Eastern")) + def test_is_timedelta64_ns_dtype(): assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]")) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 5891c28c11a68..2d195fad83644 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -55,6 +55,7 @@ def test_namespace(): "get_unit_from_dtype", "periods_per_day", "periods_per_second", + "is_supported_unit", ] expected = set(submodules + api)