Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ In cases with mixed-resolution inputs, the highest resolution is used:

.. warning:: Many users will now get "M8[us]" dtype data in cases when they used to get "M8[ns]". For most use cases they should not notice a difference. One big exception is converting to integers, which will give integers 1000x smaller.

Similarly, the :class:`Timedelta` constructor and :func:`to_timedelta` with a string input now defaults to a microsecond unit, using nanosecond unit only in cases that actually have nanosecond precision.

.. _whatsnew_300.api_breaking.concat_datetime_sorting:

:func:`concat` no longer ignores ``sort`` when all objects have a :class:`DatetimeIndex`
Expand Down
48 changes: 41 additions & 7 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import collections
import re
import warnings

from pandas.util._decorators import set_module
Expand Down Expand Up @@ -448,11 +449,19 @@ def array_to_timedelta64(
ival = parse_iso_format_string(item)
else:
ival = parse_timedelta_string(item)
if (
(infer_reso or creso == NPY_DATETIMEUNIT.NPY_FR_us)
and not needs_nano_unit(ival, item)
):
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
ival = ival // 1000
else:
item_reso = NPY_FR_ns

item_reso = NPY_FR_ns
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
if ival != NPY_NAT:
state.update_creso(item_reso)
if infer_reso:
creso = state.creso

elif is_tick_object(item):
item_reso = get_supported_reso(item._creso)
Expand Down Expand Up @@ -722,6 +731,24 @@ cdef timedelta_from_spec(object number, object frac, object unit):
return cast_from_unit(float(n), unit)


cdef bint needs_nano_unit(int64_t ival, str item):
"""
Check if a passed string `item` needs to be stored with nano unit or can
use microsecond instead. Needs nanoseconds if:

- if the parsed value in nanoseconds has sub-microseconds content -> certainly
needs nano
- if the seconds part in the string contains more than 6 decimals, i.e. has
trailing zeros beyond the microsecond part (e.g. "0.123456000 s") -> treat
as nano for consistency
- if the string explicitly contains an entry for nanoseconds (e.g. "1000 ns")
"""
# TODO: more performant way of doing this check?
if ival % 1000 != 0:
return True
return re.search(r"\.\d{7}", item) or "ns" in item or "nano" in item.lower()


cpdef inline str parse_timedelta_unit(str unit):
"""
Parameters
Expand Down Expand Up @@ -2121,10 +2148,17 @@ class Timedelta(_Timedelta):
if (len(value) > 0 and value[0] == "P") or (
len(value) > 1 and value[:2] == "-P"
):
value = parse_iso_format_string(value)
ival = parse_iso_format_string(value)
else:
ival = parse_timedelta_string(value)

if not needs_nano_unit(ival, value):
# If we don't specifically need nanosecond resolution, default
# to microsecond like we do for datetimes
value = np.timedelta64(ival // 1000, "us")
return cls(value)
else:
value = parse_timedelta_string(value)
value = np.timedelta64(value)
value = np.timedelta64(ival, "ns")
elif PyDelta_Check(value):
# pytimedelta object -> microsecond resolution
new_value = delta_to_nanoseconds(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@ def inferred_freq(self) -> str | None:
>>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
>>> tdelta_idx
TimedeltaIndex(['0 days', '10 days', '20 days'],
dtype='timedelta64[ns]', freq=None)
dtype='timedelta64[us]', freq=None)
>>> tdelta_idx.inferred_freq
'10D'
"""
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class TimedeltaArray(dtl.TimelikeOps):
>>> pd.arrays.TimedeltaArray._from_sequence(pd.TimedeltaIndex(["1h", "2h"]))
<TimedeltaArray>
['0 days 01:00:00', '0 days 02:00:00']
Length: 2, dtype: timedelta64[ns]
Length: 2, dtype: timedelta64[us]
"""

_typ = "timedeltaarray"
Expand Down Expand Up @@ -813,7 +813,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]:
>>> idx = pd.to_timedelta(np.arange(5), unit="D")
>>> idx
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq=None)
dtype='timedelta64[us]', freq=None)

>>> idx.total_seconds()
Index([0.0, 86400.0, 172800.0, 259200.0, 345600.0], dtype='float64')
Expand Down Expand Up @@ -892,7 +892,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]:
>>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
>>> tdelta_idx
TimedeltaIndex(['0 days', '10 days', '20 days'],
dtype='timedelta64[ns]', freq=None)
dtype='timedelta64[us]', freq=None)
>>> tdelta_idx.days
Index([0, 10, 20], dtype='int64')"""
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _astype_nansafe(
# bc we know arr.dtype == object, this is equivalent to
# `np.asarray(to_timedelta(arr))`, but using a lower-level API that
# does not require a circular import.
tdvals = array_to_timedelta64(arr).view("m8[ns]")
tdvals = array_to_timedelta64(arr)

tda = ensure_wrapped_if_datetimelike(tdvals)
return tda.astype(dtype, copy=False)._ndarray
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8657,7 +8657,8 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"):
if lib.is_np_dtype(rvalues.dtype, "mM"):
# i.e. DatetimeArray[tznaive] or TimedeltaArray
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
else:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1560,7 +1560,7 @@ def abs(self) -> Self:
>>> s = pd.Series([pd.Timedelta("1 days")])
>>> s.abs()
0 1 days
dtype: timedelta64[ns]
dtype: timedelta64[us]

Select rows with data closest to certain value using argsort (from
`StackOverflow <https://stackoverflow.com/a/17758115>`__).
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ class TimedeltaProperties(Properties):
0 0 days 00:00:01
1 0 days 00:00:02
2 0 days 00:00:03
dtype: timedelta64[ns]
dtype: timedelta64[us]
>>> seconds_series.dt.seconds
0 1
1 2
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ def inferred_freq(self) -> str | None:
>>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
>>> tdelta_idx
TimedeltaIndex(['0 days', '10 days', '20 days'],
dtype='timedelta64[ns]', freq=None)
dtype='timedelta64[us]', freq=None)
>>> tdelta_idx.inferred_freq
'10D'
"""
Expand Down
35 changes: 22 additions & 13 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class TimedeltaIndex(DatetimeTimedeltaMixin):
--------
>>> pd.TimedeltaIndex(["0 days", "1 days", "2 days", "3 days", "4 days"])
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq=None)
dtype='timedelta64[us]', freq=None)

We can also let pandas infer the frequency when possible.

Expand Down Expand Up @@ -230,18 +230,27 @@ def get_loc(self, key):

return Index.get_loc(self, key)

# error: Return type "tuple[Timedelta | NaTType, None]" of "_parse_with_reso"
# incompatible with return type "tuple[datetime, Resolution]" in supertype
# "DatetimeIndexOpsMixin"
def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, None]: # type: ignore[override]
# the "with_reso" is a no-op for TimedeltaIndex
# error: Return type "tuple[Timedelta | NaTType, Resolution]" of
# "_parse_with_reso" incompatible with return type
# "tuple[datetime, Resolution]" in supertype
# "pandas.core.indexes.datetimelike.DatetimeIndexOpsMixin"
def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, Resolution]: # type: ignore[override]
parsed = Timedelta(label)
return parsed, None
if isinstance(parsed, Timedelta):
reso = Resolution.get_reso_from_freqstr(parsed.unit)
else:
# i.e. pd.NaT
reso = Resolution.get_reso_from_freqstr("s")
return parsed, reso

def _parsed_string_to_bounds(self, reso, parsed: Timedelta):
def _parsed_string_to_bounds(self, reso: Resolution, parsed: Timedelta):
# reso is unused, included to match signature of DTI/PI
lbound = parsed.round(parsed.resolution_string)
rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns")
rbound = (
lbound
+ to_offset(parsed.resolution_string)
- Timedelta(1, unit=self.unit).as_unit(self.unit)
)
return lbound, rbound

# -------------------------------------------------------------------
Expand Down Expand Up @@ -314,14 +323,14 @@ def timedelta_range(
--------
>>> pd.timedelta_range(start="1 day", periods=4)
TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq='D')
dtype='timedelta64[us]', freq='D')

The ``closed`` parameter specifies which endpoint is included. The default
behavior is to include both endpoints.

>>> pd.timedelta_range(start="1 day", periods=4, closed="right")
TimedeltaIndex(['2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq='D')
dtype='timedelta64[us]', freq='D')

The ``freq`` parameter specifies the frequency of the TimedeltaIndex.
Only fixed frequencies can be passed, non-fixed frequencies such as
Expand All @@ -330,15 +339,15 @@ def timedelta_range(
>>> pd.timedelta_range(start="1 day", end="2 days", freq="6h")
TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
'1 days 18:00:00', '2 days 00:00:00'],
dtype='timedelta64[ns]', freq='6h')
dtype='timedelta64[us]', freq='6h')

Specify ``start``, ``end``, and ``periods``; the frequency is generated
automatically (linearly spaced).

>>> pd.timedelta_range(start="1 day", end="5 days", periods=4)
TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00',
'5 days 00:00:00'],
dtype='timedelta64[ns]', freq=None)
dtype='timedelta64[us]', freq=None)

**Specify a unit**

Expand Down
5 changes: 3 additions & 2 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,8 +944,9 @@ def nanstd(
>>> nanops.nanstd(s.values)
1.0
"""
if values.dtype == "M8[ns]":
values = values.view("m8[ns]")
if values.dtype.kind == "M":
unit = np.datetime_data(values.dtype)[0]
values = values.view(f"m8[{unit}]")

orig_dtype = values.dtype
values, mask = _get_values(values, skipna, mask=mask)
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,9 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
# Avoid possible ambiguities with pd.NaT
# GH 52295
if is_unitless(obj.dtype):
obj = obj.astype("datetime64[ns]")
# Use second resolution to ensure that the result of e.g.
# `left - np.datetime64("NaT")` retains the unit of left.unit
obj = obj.astype("datetime64[s]")
elif not is_supported_dtype(obj.dtype):
new_dtype = get_supported_dtype(obj.dtype)
obj = obj.astype(new_dtype)
Expand All @@ -563,7 +565,9 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
# we broadcast and wrap in a TimedeltaArray
# GH 52295
if is_unitless(obj.dtype):
obj = obj.astype("timedelta64[ns]")
# Use second resolution to ensure that the result of e.g.
# `left + np.timedelta64("NaT")` retains the unit of left.unit
obj = obj.astype("timedelta64[s]")
elif not is_supported_dtype(obj.dtype):
new_dtype = get_supported_dtype(obj.dtype)
obj = obj.astype(new_dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def to_timedelta(
dtype='timedelta64[ns]', freq=None)
>>> pd.to_timedelta(np.arange(5), unit="D")
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq=None)
dtype='timedelta64[us]', freq=None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
dtype='timedelta64[us]', freq=None)
dtype='timedelta64[ns]', freq=None)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont think thats accurate

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Woops, misread. Yep you're right

"""
if unit is not None:
unit = parse_timedelta_unit(unit)
Expand Down
16 changes: 11 additions & 5 deletions pandas/plotting/_matplotlib/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
from matplotlib.axis import Axis

from pandas._libs.tslibs.offsets import BaseOffset
from pandas._typing import TimeUnit


_mpl_units: dict = {} # Cache for units overwritten by us
Expand Down Expand Up @@ -1099,18 +1100,22 @@ class TimeSeries_TimedeltaFormatter(mpl.ticker.Formatter): # pyright: ignore[re
Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`.
"""

def __init__(self, unit: TimeUnit = "ns"):
self.unit = unit
super().__init__()

axis: Axis

@staticmethod
def format_timedelta_ticks(x, pos, n_decimals: int) -> str:
def format_timedelta_ticks(x, pos, n_decimals: int, exp: int) -> str:
"""
Convert seconds to 'D days HH:MM:SS.F'
"""
s, ns = divmod(x, 10**9) # TODO(non-nano): this looks like it assumes ns
s, ns = divmod(x, 10**exp)
m, s = divmod(s, 60)
h, m = divmod(m, 60)
d, h = divmod(h, 24)
decimals = int(ns * 10 ** (n_decimals - 9))
decimals = int(ns * 10 ** (n_decimals - exp))
s = f"{int(h):02d}:{int(m):02d}:{int(s):02d}"
if n_decimals > 0:
s += f".{decimals:0{n_decimals}d}"
Expand All @@ -1119,6 +1124,7 @@ def format_timedelta_ticks(x, pos, n_decimals: int) -> str:
return s

def __call__(self, x, pos: int | None = 0) -> str:
exp = {"ns": 9, "us": 6, "ms": 3, "s": 0}[self.unit]
(vmin, vmax) = tuple(self.axis.get_view_interval())
n_decimals = min(int(np.ceil(np.log10(100 * 10**9 / abs(vmax - vmin)))), 9)
return self.format_timedelta_ticks(x, pos, n_decimals)
n_decimals = min(int(np.ceil(np.log10(100 * 10**exp / abs(vmax - vmin)))), exp)
return self.format_timedelta_ticks(x, pos, n_decimals, exp)
2 changes: 1 addition & 1 deletion pandas/plotting/_matplotlib/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def format_dateaxis(
subplot.format_coord = functools.partial(_format_coord, freq)

elif isinstance(index, ABCTimedeltaIndex):
subplot.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter())
subplot.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter(index.unit))
else:
raise TypeError("index type not supported")

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ def test_apply_non_numpy_dtype():

result = df.apply(lambda x: x + pd.Timedelta("1day"))
expected = DataFrame(
{"dt": date_range("2015-01-02", periods=3, tz="Europe/Brussels", unit="ns")}
{"dt": date_range("2015-01-02", periods=3, tz="Europe/Brussels")}
)
tm.assert_frame_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def test_apply_box_td64():
# timedelta
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
ser = Series(vals)
assert ser.dtype == "timedelta64[ns]"
assert ser.dtype == "timedelta64[us]"
res = ser.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat")
exp = Series(["Timedelta_1", "Timedelta_2"])
tm.assert_series_equal(res, exp)
Expand Down
Loading
Loading