diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index fbc3c4fe4ce92..ff4aa9968f294 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1159,6 +1159,7 @@ Other API Changes - :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`). - :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`) - The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (:issue:`24372`). +- :class:`DatetimeIndex` and :class:`TimedeltaIndex` no longer ignore the dtype precision. Passing a non-nanosecond resolution dtype will raise a ``ValueError`` (:issue:`24753`) .. _whatsnew_0240.api.extension: @@ -1259,6 +1260,7 @@ Deprecations - :meth:`Series.nonzero` is deprecated and will be removed in a future version (:issue:`18262`) - Passing an integer to :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtypes is deprecated, will raise ``TypeError`` in a future version. Use ``obj.fillna(pd.Timedelta(...))`` instead (:issue:`24694`) - ``Series.cat.categorical``, ``Series.cat.name`` and ``Sersies.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`). +- Passing a dtype without a precision like ``np.dtype('datetime64')`` or ``timedelta64`` to :class:`Index`, :class:`DatetimeIndex` and :class:`TimedeltaIndex` is now deprecated. Use the nanosecond-precision dtype instead (:issue:`24753`). .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a2d67efbecbba..f2aeb1c1309de 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from datetime import datetime, time, timedelta +import textwrap import warnings import numpy as np @@ -1986,6 +1987,15 @@ def _validate_dt64_dtype(dtype): """ if dtype is not None: dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, np.dtype("M8")): + # no precision, warn + dtype = _NS_DTYPE + msg = textwrap.dedent("""\ + Passing in 'datetime64' dtype with no precision is deprecated + and will raise in a future version. Please pass in + 'datetime64[ns]' instead.""") + warnings.warn(msg, FutureWarning, stacklevel=5) + if ((isinstance(dtype, np.dtype) and dtype != _NS_DTYPE) or not isinstance(dtype, (np.dtype, DatetimeTZDtype))): raise ValueError("Unexpected value for 'dtype': '{dtype}'. " diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index be1a7097b0e0d..910cb96a86216 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -2,6 +2,7 @@ from __future__ import division from datetime import timedelta +import textwrap import warnings import numpy as np @@ -15,8 +16,8 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, - is_integer_dtype, is_list_like, is_object_dtype, is_scalar, + _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_dtype_equal, + is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -160,16 +161,8 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): # nanosecond UTC (or tz-naive) unix timestamps values = values.view(_TD_DTYPE) - if values.dtype != _TD_DTYPE: - raise TypeError(_BAD_DTYPE.format(dtype=values.dtype)) - - try: - dtype_mismatch = dtype != _TD_DTYPE - except TypeError: - raise TypeError(_BAD_DTYPE.format(dtype=dtype)) - else: - if dtype_mismatch: - raise TypeError(_BAD_DTYPE.format(dtype=dtype)) + _validate_td64_dtype(values.dtype) + dtype = _validate_td64_dtype(dtype) if freq == "infer": msg = ( @@ -204,9 +197,8 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): @classmethod def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False, freq=None, unit=None): - if dtype != _TD_DTYPE: - raise ValueError("Only timedelta64[ns] dtype is valid.") - + if dtype: + _validate_td64_dtype(dtype) freq, freq_infer = dtl.maybe_infer_freq(freq) data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) @@ -997,6 +989,22 @@ def objects_to_td64ns(data, unit="ns", errors="raise"): return result.view('timedelta64[ns]') +def _validate_td64_dtype(dtype): + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, np.dtype("timedelta64")): + dtype = _TD_DTYPE + msg = textwrap.dedent("""\ + Passing in 'timedelta' dtype with no precision is deprecated + and will raise in a future version. Please pass in + 'timedelta64[ns]' instead.""") + warnings.warn(msg, FutureWarning, stacklevel=4) + + if not is_dtype_equal(dtype, _TD_DTYPE): + raise ValueError(_BAD_DTYPE.format(dtype=dtype)) + + return dtype + + def _generate_regular_range(start, end, periods, offset): stride = offset.nanos if periods is None: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 93091f5125b7c..767da81c5c43a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -311,10 +311,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif (is_timedelta64_dtype(data) or (dtype is not None and is_timedelta64_dtype(dtype))): from pandas import TimedeltaIndex - result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) - if dtype is not None and _o_dtype == dtype: - return Index(result.to_pytimedelta(), dtype=_o_dtype) + if dtype is not None and is_dtype_equal(_o_dtype, dtype): + # Note we can pass copy=False because the .astype below + # will always make a copy + result = TimedeltaIndex(data, copy=False, name=name, **kwargs) + return result.astype(object) else: + result = TimedeltaIndex(data, copy=copy, name=name, + dtype=dtype, **kwargs) return result elif is_period_dtype(data) and not is_object_dtype(dtype): diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index a8745f78392ca..6b4662ca02e80 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -23,18 +23,18 @@ def test_non_array_raises(self): TimedeltaArray([1, 2, 3]) def test_other_type_raises(self): - with pytest.raises(TypeError, + with pytest.raises(ValueError, match="dtype bool cannot be converted"): TimedeltaArray(np.array([1, 2, 3], dtype='bool')) def test_incorrect_dtype_raises(self): # TODO: why TypeError for 'category' but ValueError for i8? - with pytest.raises(TypeError, + with pytest.raises(ValueError, match=r'category cannot be converted ' r'to timedelta64\[ns\]'): TimedeltaArray(np.array([1, 2, 3], dtype='i8'), dtype='category') - with pytest.raises(TypeError, + with pytest.raises(ValueError, match=r"dtype int64 cannot be converted " r"to timedelta64\[ns\]"): TimedeltaArray(np.array([1, 2, 3], dtype='i8'), @@ -52,7 +52,7 @@ def test_copy(self): class TestTimedeltaArray(object): def test_from_sequence_dtype(self): - msg = r"Only timedelta64\[ns\] dtype is valid" + msg = "dtype .*object.* cannot be converted to timedelta64" with pytest.raises(ValueError, match=msg): TimedeltaArray._from_sequence([], dtype=object) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 7c9ca9da89d53..7ebebbf6dee28 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -634,6 +634,23 @@ def test_construction_with_nat_and_tzlocal(self): expected = DatetimeIndex([Timestamp('2018', tz=tz), pd.NaT]) tm.assert_index_equal(result, expected) + def test_constructor_no_precision_warns(self): + # GH-24753, GH-24739 + expected = pd.DatetimeIndex(['2000'], dtype='datetime64[ns]') + + # we set the stacklevel for DatetimeIndex + with tm.assert_produces_warning(FutureWarning): + result = pd.DatetimeIndex(['2000'], dtype='datetime64') + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = pd.Index(['2000'], dtype='datetime64') + tm.assert_index_equal(result, expected) + + def test_constructor_wrong_precision_raises(self): + with pytest.raises(ValueError): + pd.DatetimeIndex(['2000'], dtype='datetime64[us]') + class TestTimeSeries(object): diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 76f79e86e6f11..3938d6acad2f0 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -180,3 +180,20 @@ def test_constructor_name(self): # GH10025 idx2 = TimedeltaIndex(idx, name='something else') assert idx2.name == 'something else' + + def test_constructor_no_precision_warns(self): + # GH-24753, GH-24739 + expected = pd.TimedeltaIndex(['2000'], dtype='timedelta64[ns]') + + # we set the stacklevel for DatetimeIndex + with tm.assert_produces_warning(FutureWarning): + result = pd.TimedeltaIndex(['2000'], dtype='timedelta64') + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = pd.Index(['2000'], dtype='timedelta64') + tm.assert_index_equal(result, expected) + + def test_constructor_wrong_precision_raises(self): + with pytest.raises(ValueError): + pd.TimedeltaIndex(['2000'], dtype='timedelta64[us]')