diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4ac737bb6b29a..66f95d0c76e51 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -119,6 +119,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ +- :func:`factorize` and :func:`unique` preserve the original dtype when passed numpy timedelta64 or datetime64 with non-nanosecond resolution (:issue:`48670`) - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`) - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser. - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 1988753de3d01..3d9da31383178 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -40,7 +40,6 @@ from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, infer_dtype_from_array, - sanitize_to_nanoseconds, ) from pandas.core.dtypes.common import ( ensure_float64, @@ -50,7 +49,6 @@ is_bool_dtype, is_categorical_dtype, is_complex_dtype, - is_datetime64_dtype, is_extension_array_dtype, is_float_dtype, is_integer, @@ -60,7 +58,6 @@ is_object_dtype, is_scalar, is_signed_integer_dtype, - is_timedelta64_dtype, needs_i8_conversion, ) from pandas.core.dtypes.concat import concat_compat @@ -183,8 +180,6 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: # datetimelike elif needs_i8_conversion(values.dtype): - if isinstance(values, np.ndarray): - values = sanitize_to_nanoseconds(values) npvalues = values.view("i8") npvalues = cast(np.ndarray, npvalues) return npvalues @@ -222,11 +217,6 @@ def _reconstruct_data( values = cls._from_sequence(values, dtype=dtype) else: - if is_datetime64_dtype(dtype): - dtype = np.dtype("datetime64[ns]") - elif is_timedelta64_dtype(dtype): - dtype = np.dtype("timedelta64[ns]") - values = values.astype(dtype, copy=False) return values diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9830d22f3e2e5..cacf3da6f2749 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -30,7 +30,6 @@ OutOfBoundsTimedelta, Timedelta, Timestamp, - astype_overflowsafe, get_supported_reso, get_unit_from_dtype, is_supported_unit, @@ -52,7 +51,6 @@ from pandas.core.dtypes.astype import astype_nansafe from pandas.core.dtypes.common import ( - DT64NS_DTYPE, TD64NS_DTYPE, ensure_int8, ensure_int16, @@ -1433,23 +1431,6 @@ def maybe_cast_to_datetime( return cast(ArrayLike, value) -def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarray: - """ - Safely convert non-nanosecond datetime64 or timedelta64 values to nanosecond. - """ - dtype = values.dtype - if dtype.kind == "M" and dtype != DT64NS_DTYPE: - values = astype_overflowsafe(values, dtype=DT64NS_DTYPE) - - elif dtype.kind == "m" and dtype != TD64NS_DTYPE: - values = astype_overflowsafe(values, dtype=TD64NS_DTYPE) - - elif copy: - values = values.copy() - - return values - - def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj: """ Convert dtypes with granularity less than nanosecond to nanosecond diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index b6891dac9034b..fdfd263bfeee4 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -345,9 +345,7 @@ def test_datetime64_factorize(self, writable): data = np.array([np.datetime64("2020-01-01T00:00:00.000")]) data.setflags(write=writable) expected_codes = np.array([0], dtype=np.intp) - expected_uniques = np.array( - ["2020-01-01T00:00:00.000000000"], dtype="datetime64[ns]" - ) + expected_uniques = np.array(["2020-01-01T00:00:00.000"], dtype="datetime64[ms]") codes, uniques = pd.factorize(data) tm.assert_numpy_array_equal(codes, expected_codes) @@ -616,13 +614,13 @@ def test_datetime64_dtype_array_returned(self): def test_datetime_non_ns(self): a = np.array(["2000", "2000", "2001"], dtype="datetime64[s]") result = pd.unique(a) - expected = np.array(["2000", "2001"], dtype="datetime64[ns]") + expected = a[1:] tm.assert_numpy_array_equal(result, expected) def test_timedelta_non_ns(self): a = np.array(["2000", "2000", "2001"], dtype="timedelta64[s]") result = pd.unique(a) - expected = np.array([2000000000000, 2001000000000], dtype="timedelta64[ns]") + expected = a[1:] tm.assert_numpy_array_equal(result, expected) def test_timedelta64_dtype_array_returned(self):