Skip to content

Commit dd24e76

Browse files
jbrockmendeljreback
authored andcommitted
Fix arithmetic errors with timedelta64 dtypes (#22390)
1 parent 2be2ba5 commit dd24e76

File tree

6 files changed

+128
-72
lines changed

6 files changed

+128
-72
lines changed

doc/source/whatsnew/v0.24.0.txt

+5-3
Original file line numberDiff line numberDiff line change
@@ -581,12 +581,14 @@ Datetimelike
581581
- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
582582
- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
583583
- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
584-
- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
585-
-
586584

587585
Timedelta
588586
^^^^^^^^^
589-
587+
- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
588+
- Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`)
589+
- Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`)
590+
- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`)
591+
- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`)
590592
-
591593
-
592594
-

pandas/core/indexes/base.py

+10
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,14 @@ def index_arithmetic_method(self, other):
122122
elif isinstance(other, ABCTimedeltaIndex):
123123
# Defer to subclass implementation
124124
return NotImplemented
125+
elif isinstance(other, np.ndarray) and is_timedelta64_dtype(other):
126+
# GH#22390; wrap in Series for op, this will in turn wrap in
127+
# TimedeltaIndex, but will correctly raise TypeError instead of
128+
# NullFrequencyError for add/sub ops
129+
from pandas import Series
130+
other = Series(other)
131+
out = op(self, other)
132+
return Index(out, name=self.name)
125133

126134
other = self._validate_for_numeric_binop(other, op)
127135

@@ -2689,6 +2697,8 @@ def argsort(self, *args, **kwargs):
26892697
return result.argsort(*args, **kwargs)
26902698

26912699
def __add__(self, other):
2700+
if isinstance(other, (ABCSeries, ABCDataFrame)):
2701+
return NotImplemented
26922702
return Index(np.array(self) + other)
26932703

26942704
def __radd__(self, other):

pandas/core/indexes/range.py

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas.core.dtypes.common import (
99
is_integer,
1010
is_scalar,
11+
is_timedelta64_dtype,
1112
is_int64_dtype)
1213
from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex
1314

@@ -596,6 +597,9 @@ def _evaluate_numeric_binop(self, other):
596597
# GH#19333 is_integer evaluated True on timedelta64,
597598
# so we need to catch these explicitly
598599
return op(self._int64index, other)
600+
elif is_timedelta64_dtype(other):
601+
# Must be an np.ndarray; GH#22390
602+
return op(self._int64index, other)
599603

600604
other = self._validate_for_numeric_binop(other, op)
601605
attrs = self._get_attributes_dict()

pandas/core/ops.py

+42
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,37 @@ def _maybe_match_name(a, b):
107107
return None
108108

109109

110+
def maybe_upcast_for_op(obj):
111+
"""
112+
Cast non-pandas objects to pandas types to unify behavior of arithmetic
113+
and comparison operations.
114+
115+
Parameters
116+
----------
117+
obj: object
118+
119+
Returns
120+
-------
121+
out : object
122+
123+
Notes
124+
-----
125+
Be careful to call this *after* determining the `name` attribute to be
126+
attached to the result of the arithmetic operation.
127+
"""
128+
if type(obj) is datetime.timedelta:
129+
# GH#22390 cast up to Timedelta to rely on Timedelta
130+
# implementation; otherwise operation against numeric-dtype
131+
# raises TypeError
132+
return pd.Timedelta(obj)
133+
elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj):
134+
# GH#22390 Unfortunately we need to special-case right-hand
135+
# timedelta64 dtypes because numpy casts integer dtypes to
136+
# timedelta64 when operating with timedelta64
137+
return pd.TimedeltaIndex(obj)
138+
return obj
139+
140+
110141
# -----------------------------------------------------------------------------
111142
# Reversed Operations not available in the stdlib operator module.
112143
# Defining these instead of using lambdas allows us to reference them by name.
@@ -1222,6 +1253,7 @@ def wrapper(left, right):
12221253

12231254
left, right = _align_method_SERIES(left, right)
12241255
res_name = get_op_result_name(left, right)
1256+
right = maybe_upcast_for_op(right)
12251257

12261258
if is_categorical_dtype(left):
12271259
raise TypeError("{typ} cannot perform the operation "
@@ -1244,6 +1276,16 @@ def wrapper(left, right):
12441276
index=left.index, name=res_name,
12451277
dtype=result.dtype)
12461278

1279+
elif is_timedelta64_dtype(right) and not is_scalar(right):
1280+
# i.e. exclude np.timedelta64 object
1281+
# Note: we cannot use dispatch_to_index_op because
1282+
# that may incorrectly raise TypeError when we
1283+
# should get NullFrequencyError
1284+
result = op(pd.Index(left), right)
1285+
return construct_result(left, result,
1286+
index=left.index, name=res_name,
1287+
dtype=result.dtype)
1288+
12471289
lvalues = left.values
12481290
rvalues = right
12491291
if isinstance(rvalues, ABCSeries):

pandas/tests/arithmetic/test_numeric.py

+55-13
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# Arithmetc tests for DataFrame/Series/Index/Array classes that should
33
# behave identically.
44
# Specifically for numeric dtypes
5-
from datetime import timedelta
65
from decimal import Decimal
76
import operator
87
from collections import Iterable
@@ -47,7 +46,61 @@ def test_operator_series_comparison_zerorank(self):
4746
# ------------------------------------------------------------------
4847
# Numeric dtypes Arithmetic with Timedelta Scalar
4948

50-
class TestNumericArraylikeArithmeticWithTimedeltaScalar(object):
49+
class TestNumericArraylikeArithmeticWithTimedeltaLike(object):
50+
51+
# TODO: also check name retentention
52+
@pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series])
53+
@pytest.mark.parametrize('left', [
54+
pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype)
55+
for dtype in ['i1', 'i2', 'i4', 'i8',
56+
'u1', 'u2', 'u4', 'u8',
57+
'f2', 'f4', 'f8']
58+
for cls in [pd.Series, pd.Index]],
59+
ids=lambda x: type(x).__name__ + str(x.dtype))
60+
def test_mul_td64arr(self, left, box_cls):
61+
# GH#22390
62+
right = np.array([1, 2, 3], dtype='m8[s]')
63+
right = box_cls(right)
64+
65+
expected = pd.TimedeltaIndex(['10s', '40s', '90s'])
66+
if isinstance(left, pd.Series) or box_cls is pd.Series:
67+
expected = pd.Series(expected)
68+
69+
result = left * right
70+
tm.assert_equal(result, expected)
71+
72+
result = right * left
73+
tm.assert_equal(result, expected)
74+
75+
# TODO: also check name retentention
76+
@pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series])
77+
@pytest.mark.parametrize('left', [
78+
pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype)
79+
for dtype in ['i1', 'i2', 'i4', 'i8',
80+
'u1', 'u2', 'u4', 'u8',
81+
'f2', 'f4', 'f8']
82+
for cls in [pd.Series, pd.Index]],
83+
ids=lambda x: type(x).__name__ + str(x.dtype))
84+
def test_div_td64arr(self, left, box_cls):
85+
# GH#22390
86+
right = np.array([10, 40, 90], dtype='m8[s]')
87+
right = box_cls(right)
88+
89+
expected = pd.TimedeltaIndex(['1s', '2s', '3s'])
90+
if isinstance(left, pd.Series) or box_cls is pd.Series:
91+
expected = pd.Series(expected)
92+
93+
result = right / left
94+
tm.assert_equal(result, expected)
95+
96+
result = right // left
97+
tm.assert_equal(result, expected)
98+
99+
with pytest.raises(TypeError):
100+
left / right
101+
102+
with pytest.raises(TypeError):
103+
left // right
51104

52105
# TODO: de-duplicate with test_numeric_arr_mul_tdscalar
53106
def test_ops_series(self):
@@ -71,11 +124,6 @@ def test_ops_series(self):
71124
ids=lambda x: type(x).__name__)
72125
def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
73126
# GH#19333
74-
75-
if (box in [Series, pd.DataFrame] and
76-
type(scalar_td) is timedelta and index.dtype == 'f8'):
77-
raise pytest.xfail(reason="Cannot multiply timedelta by float")
78-
79127
expected = pd.timedelta_range('1 days', '10 days')
80128

81129
index = tm.box_expected(index, box)
@@ -99,12 +147,6 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
99147
Timedelta(days=1).to_pytimedelta()],
100148
ids=lambda x: type(x).__name__)
101149
def test_numeric_arr_rdiv_tdscalar(self, scalar_td, index, box):
102-
103-
if box is Series and type(scalar_td) is timedelta:
104-
raise pytest.xfail(reason="TODO: Figure out why this case fails")
105-
if box is pd.DataFrame and isinstance(scalar_td, timedelta):
106-
raise pytest.xfail(reason="TODO: Figure out why this case fails")
107-
108150
expected = TimedeltaIndex(['1 Day', '12 Hours'])
109151

110152
index = tm.box_expected(index, box)

pandas/tests/test_arithmetic.py

+12-56
Original file line numberDiff line numberDiff line change
@@ -573,19 +573,8 @@ def test_td64arr_add_int_series_invalid(self, box, tdser):
573573
with pytest.raises(err):
574574
tdser + Series([2, 3, 4])
575575

576-
@pytest.mark.parametrize('box', [
577-
pd.Index,
578-
pytest.param(Series,
579-
marks=pytest.mark.xfail(reason="GH#19123 integer "
580-
"interpreted as "
581-
"nanoseconds",
582-
strict=True)),
583-
pytest.param(pd.DataFrame,
584-
marks=pytest.mark.xfail(reason="Attempts to broadcast "
585-
"incorrectly",
586-
strict=True, raises=ValueError))
587-
], ids=lambda x: x.__name__)
588-
def test_td64arr_radd_int_series_invalid(self, box, tdser):
576+
def test_td64arr_radd_int_series_invalid(self, box_df_fail, tdser):
577+
box = box_df_fail # Tries to broadcast incorrectly
589578
tdser = tm.box_expected(tdser, box)
590579
err = TypeError if box is not pd.Index else NullFrequencyError
591580
with pytest.raises(err):
@@ -605,11 +594,11 @@ def test_td64arr_sub_int_series_invalid(self, box, tdser):
605594
with pytest.raises(err):
606595
tdser - Series([2, 3, 4])
607596

608-
@pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds',
609-
strict=True)
610-
def test_td64arr_rsub_int_series_invalid(self, box, tdser):
597+
def test_td64arr_rsub_int_series_invalid(self, box_df_fail, tdser):
598+
box = box_df_fail # Tries to broadcast incorrectly
611599
tdser = tm.box_expected(tdser, box)
612-
with pytest.raises(TypeError):
600+
err = TypeError if box is not pd.Index else NullFrequencyError
601+
with pytest.raises(err):
613602
Series([2, 3, 4]) - tdser
614603

615604
@pytest.mark.parametrize('box', [
@@ -671,14 +660,6 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser):
671660
with pytest.raises(err):
672661
scalar - tdser
673662

674-
@pytest.mark.parametrize('box', [
675-
pd.Index,
676-
Series,
677-
pytest.param(pd.DataFrame,
678-
marks=pytest.mark.xfail(reason="Tries to broadcast "
679-
"incorrectly",
680-
strict=True, raises=ValueError))
681-
], ids=lambda x: x.__name__)
682663
@pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16',
683664
'uint64', 'uint32', 'uint16', 'uint8',
684665
'float64', 'float32', 'float16'])
@@ -688,10 +669,9 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser):
688669
Series([1, 2, 3])
689670
# TODO: Add DataFrame in here?
690671
], ids=lambda x: type(x).__name__)
691-
def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser):
692-
if type(vec) is Series and not dtype.startswith('float'):
693-
pytest.xfail(reason='GH#19123 integer interpreted as nanos')
694-
672+
def test_td64arr_add_sub_numeric_arr_invalid(self, box_df_fail, vec,
673+
dtype, tdser):
674+
box = box_df_fail # tries to broadcast incorrectly
695675
tdser = tm.box_expected(tdser, box)
696676
err = TypeError
697677
if box is pd.Index and not dtype.startswith('float'):
@@ -865,9 +845,6 @@ def test_td64arr_sub_NaT(self, box):
865845

866846
def test_td64arr_add_timedeltalike(self, delta, box):
867847
# only test adding/sub offsets as + is now numeric
868-
if box is pd.DataFrame and isinstance(delta, pd.DateOffset):
869-
pytest.xfail(reason="Returns object dtype instead of m8[ns]")
870-
871848
rng = timedelta_range('1 days', '10 days')
872849
expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00',
873850
freq='D')
@@ -879,9 +856,6 @@ def test_td64arr_add_timedeltalike(self, delta, box):
879856

880857
def test_td64arr_sub_timedeltalike(self, delta, box):
881858
# only test adding/sub offsets as - is now numeric
882-
if box is pd.DataFrame and isinstance(delta, pd.DateOffset):
883-
pytest.xfail(reason="Returns object dtype instead of m8[ns]")
884-
885859
rng = timedelta_range('1 days', '10 days')
886860
expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00')
887861

@@ -929,11 +903,7 @@ def test_timedelta64_operations_with_DateOffset(self):
929903

930904
@pytest.mark.parametrize('box', [
931905
pd.Index,
932-
pytest.param(Series,
933-
marks=pytest.mark.xfail(reason="Index fails to return "
934-
"NotImplemented on "
935-
"reverse op",
936-
strict=True)),
906+
Series,
937907
pytest.param(pd.DataFrame,
938908
marks=pytest.mark.xfail(reason="Tries to broadcast "
939909
"incorrectly",
@@ -1021,23 +991,12 @@ def test_td64arr_sub_offset_array(self, box_df_fail):
1021991
res = tdi - other
1022992
tm.assert_equal(res, expected)
1023993

1024-
@pytest.mark.parametrize('box', [
1025-
pd.Index,
1026-
pytest.param(Series,
1027-
marks=pytest.mark.xfail(reason="object dtype Series "
1028-
"fails to return "
1029-
"NotImplemented",
1030-
strict=True, raises=TypeError)),
1031-
pytest.param(pd.DataFrame,
1032-
marks=pytest.mark.xfail(reason="tries to broadcast "
1033-
"incorrectly",
1034-
strict=True, raises=ValueError))
1035-
], ids=lambda x: x.__name__)
1036994
@pytest.mark.parametrize('names', [(None, None, None),
1037995
('foo', 'bar', None),
1038996
('foo', 'foo', 'foo')])
1039-
def test_td64arr_with_offset_series(self, names, box):
997+
def test_td64arr_with_offset_series(self, names, box_df_fail):
1040998
# GH#18849
999+
box = box_df_fail # tries to broadcast incorrectly
10411000
box2 = Series if box is pd.Index else box
10421001

10431002
tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'],
@@ -1132,9 +1091,6 @@ def test_td64arr_mul_int(self, box):
11321091
tm.assert_equal(result, idx)
11331092

11341093
def test_td64arr_mul_tdlike_scalar_raises(self, delta, box):
1135-
if box is pd.DataFrame and not isinstance(delta, pd.DateOffset):
1136-
pytest.xfail(reason="returns m8[ns] instead of raising")
1137-
11381094
rng = timedelta_range('1 days', '10 days', name='foo')
11391095
rng = tm.box_expected(rng, box)
11401096
with pytest.raises(TypeError):

0 commit comments

Comments
 (0)