Fix arithmetic errors with timedelta64 dtypes (#22390)

jbrockmendel · jreback · commit dd24e761ea49 · 2018-08-23T06:37:06.000-04:00
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -581,12 +581,14 @@ Datetimelike
 - Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
 - Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
 - Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
-- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
--
 
 Timedelta
 ^^^^^^^^^
-
+- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
+- Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`)
+- Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`)
+- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`)
+- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`)
 -
 -
 -
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -122,6 +122,14 @@ def index_arithmetic_method(self, other):
         elif isinstance(other, ABCTimedeltaIndex):
             # Defer to subclass implementation
             return NotImplemented
+        elif isinstance(other, np.ndarray) and is_timedelta64_dtype(other):
+            # GH#22390; wrap in Series for op, this will in turn wrap in
+            # TimedeltaIndex, but will correctly raise TypeError instead of
+            # NullFrequencyError for add/sub ops
+            from pandas import Series
+            other = Series(other)
+            out = op(self, other)
+            return Index(out, name=self.name)
 
         other = self._validate_for_numeric_binop(other, op)
 
@@ -2689,6 +2697,8 @@ def argsort(self, *args, **kwargs):
         return result.argsort(*args, **kwargs)
 
     def __add__(self, other):
+        if isinstance(other, (ABCSeries, ABCDataFrame)):
+            return NotImplemented
         return Index(np.array(self) + other)
 
     def __radd__(self, other):
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -8,6 +8,7 @@
 from pandas.core.dtypes.common import (
     is_integer,
     is_scalar,
+    is_timedelta64_dtype,
     is_int64_dtype)
 from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex
 
@@ -596,6 +597,9 @@ def _evaluate_numeric_binop(self, other):
                     # GH#19333 is_integer evaluated True on timedelta64,
                     # so we need to catch these explicitly
                     return op(self._int64index, other)
+                elif is_timedelta64_dtype(other):
+                    # Must be an np.ndarray; GH#22390
+                    return op(self._int64index, other)
 
                 other = self._validate_for_numeric_binop(other, op)
                 attrs = self._get_attributes_dict()
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -107,6 +107,37 @@ def _maybe_match_name(a, b):
     return None
 
 
+def maybe_upcast_for_op(obj):
+    """
+    Cast non-pandas objects to pandas types to unify behavior of arithmetic
+    and comparison operations.
+
+    Parameters
+    ----------
+    obj: object
+
+    Returns
+    -------
+    out : object
+
+    Notes
+    -----
+    Be careful to call this *after* determining the `name` attribute to be
+    attached to the result of the arithmetic operation.
+    """
+    if type(obj) is datetime.timedelta:
+        # GH#22390  cast up to Timedelta to rely on Timedelta
+        # implementation; otherwise operation against numeric-dtype
+        # raises TypeError
+        return pd.Timedelta(obj)
+    elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj):
+        # GH#22390 Unfortunately we need to special-case right-hand
+        # timedelta64 dtypes because numpy casts integer dtypes to
+        # timedelta64 when operating with timedelta64
+        return pd.TimedeltaIndex(obj)
+    return obj
+
+
 # -----------------------------------------------------------------------------
 # Reversed Operations not available in the stdlib operator module.
 # Defining these instead of using lambdas allows us to reference them by name.
@@ -1222,6 +1253,7 @@ def wrapper(left, right):
 
         left, right = _align_method_SERIES(left, right)
         res_name = get_op_result_name(left, right)
+        right = maybe_upcast_for_op(right)
 
         if is_categorical_dtype(left):
             raise TypeError("{typ} cannot perform the operation "
@@ -1244,6 +1276,16 @@ def wrapper(left, right):
                                     index=left.index, name=res_name,
                                     dtype=result.dtype)
 
+        elif is_timedelta64_dtype(right) and not is_scalar(right):
+            # i.e. exclude np.timedelta64 object
+            # Note: we cannot use dispatch_to_index_op because
+            # that may incorrectly raise TypeError when we
+            # should get NullFrequencyError
+            result = op(pd.Index(left), right)
+            return construct_result(left, result,
+                                    index=left.index, name=res_name,
+                                    dtype=result.dtype)
+
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -2,7 +2,6 @@
 # Arithmetc tests for DataFrame/Series/Index/Array classes that should
 # behave identically.
 # Specifically for numeric dtypes
-from datetime import timedelta
 from decimal import Decimal
 import operator
 from collections import Iterable
@@ -47,7 +46,61 @@ def test_operator_series_comparison_zerorank(self):
 # ------------------------------------------------------------------
 # Numeric dtypes Arithmetic with Timedelta Scalar
 
-class TestNumericArraylikeArithmeticWithTimedeltaScalar(object):
+class TestNumericArraylikeArithmeticWithTimedeltaLike(object):
+
+    # TODO: also check name retentention
+    @pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series])
+    @pytest.mark.parametrize('left', [
+        pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype)
+                                      for dtype in ['i1', 'i2', 'i4', 'i8',
+                                                    'u1', 'u2', 'u4', 'u8',
+                                                    'f2', 'f4', 'f8']
+                                      for cls in [pd.Series, pd.Index]],
+        ids=lambda x: type(x).__name__ + str(x.dtype))
+    def test_mul_td64arr(self, left, box_cls):
+        # GH#22390
+        right = np.array([1, 2, 3], dtype='m8[s]')
+        right = box_cls(right)
+
+        expected = pd.TimedeltaIndex(['10s', '40s', '90s'])
+        if isinstance(left, pd.Series) or box_cls is pd.Series:
+            expected = pd.Series(expected)
+
+        result = left * right
+        tm.assert_equal(result, expected)
+
+        result = right * left
+        tm.assert_equal(result, expected)
+
+    # TODO: also check name retentention
+    @pytest.mark.parametrize('box_cls', [np.array, pd.Index, pd.Series])
+    @pytest.mark.parametrize('left', [
+        pd.RangeIndex(10, 40, 10)] + [cls([10, 20, 30], dtype=dtype)
+                                      for dtype in ['i1', 'i2', 'i4', 'i8',
+                                                    'u1', 'u2', 'u4', 'u8',
+                                                    'f2', 'f4', 'f8']
+                                      for cls in [pd.Series, pd.Index]],
+        ids=lambda x: type(x).__name__ + str(x.dtype))
+    def test_div_td64arr(self, left, box_cls):
+        # GH#22390
+        right = np.array([10, 40, 90], dtype='m8[s]')
+        right = box_cls(right)
+
+        expected = pd.TimedeltaIndex(['1s', '2s', '3s'])
+        if isinstance(left, pd.Series) or box_cls is pd.Series:
+            expected = pd.Series(expected)
+
+        result = right / left
+        tm.assert_equal(result, expected)
+
+        result = right // left
+        tm.assert_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            left / right
+
+        with pytest.raises(TypeError):
+            left // right
 
     # TODO: de-duplicate with test_numeric_arr_mul_tdscalar
     def test_ops_series(self):
@@ -71,11 +124,6 @@ def test_ops_series(self):
         ids=lambda x: type(x).__name__)
     def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
         # GH#19333
-
-        if (box in [Series, pd.DataFrame] and
-                type(scalar_td) is timedelta and index.dtype == 'f8'):
-            raise pytest.xfail(reason="Cannot multiply timedelta by float")
-
         expected = pd.timedelta_range('1 days', '10 days')
 
         index = tm.box_expected(index, box)
@@ -99,12 +147,6 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
         Timedelta(days=1).to_pytimedelta()],
         ids=lambda x: type(x).__name__)
     def test_numeric_arr_rdiv_tdscalar(self, scalar_td, index, box):
-
-        if box is Series and type(scalar_td) is timedelta:
-            raise pytest.xfail(reason="TODO: Figure out why this case fails")
-        if box is pd.DataFrame and isinstance(scalar_td, timedelta):
-            raise pytest.xfail(reason="TODO: Figure out why this case fails")
-
         expected = TimedeltaIndex(['1 Day', '12 Hours'])
 
         index = tm.box_expected(index, box)
diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py
@@ -573,19 +573,8 @@ def test_td64arr_add_int_series_invalid(self, box, tdser):
         with pytest.raises(err):
             tdser + Series([2, 3, 4])
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        pytest.param(Series,
-                     marks=pytest.mark.xfail(reason="GH#19123 integer "
-                                                    "interpreted as "
-                                                    "nanoseconds",
-                                             strict=True)),
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Attempts to broadcast "
-                                                    "incorrectly",
-                                             strict=True, raises=ValueError))
-    ], ids=lambda x: x.__name__)
-    def test_td64arr_radd_int_series_invalid(self, box, tdser):
+    def test_td64arr_radd_int_series_invalid(self, box_df_fail, tdser):
+        box = box_df_fail  # Tries to broadcast incorrectly
         tdser = tm.box_expected(tdser, box)
         err = TypeError if box is not pd.Index else NullFrequencyError
         with pytest.raises(err):
@@ -605,11 +594,11 @@ def test_td64arr_sub_int_series_invalid(self, box, tdser):
         with pytest.raises(err):
             tdser - Series([2, 3, 4])
 
-    @pytest.mark.xfail(reason='GH#19123 integer interpreted as nanoseconds',
-                       strict=True)
-    def test_td64arr_rsub_int_series_invalid(self, box, tdser):
+    def test_td64arr_rsub_int_series_invalid(self, box_df_fail, tdser):
+        box = box_df_fail  # Tries to broadcast incorrectly
         tdser = tm.box_expected(tdser, box)
-        with pytest.raises(TypeError):
+        err = TypeError if box is not pd.Index else NullFrequencyError
+        with pytest.raises(err):
             Series([2, 3, 4]) - tdser
 
     @pytest.mark.parametrize('box', [
@@ -671,14 +660,6 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser):
         with pytest.raises(err):
             scalar - tdser
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Tries to broadcast "
-                                                    "incorrectly",
-                                             strict=True, raises=ValueError))
-    ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16',
                                        'uint64', 'uint32', 'uint16', 'uint8',
                                        'float64', 'float32', 'float16'])
@@ -688,10 +669,9 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser):
         Series([1, 2, 3])
         # TODO: Add DataFrame in here?
     ], ids=lambda x: type(x).__name__)
-    def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser):
-        if type(vec) is Series and not dtype.startswith('float'):
-            pytest.xfail(reason='GH#19123 integer interpreted as nanos')
-
+    def test_td64arr_add_sub_numeric_arr_invalid(self, box_df_fail, vec,
+                                                 dtype, tdser):
+        box = box_df_fail  # tries to broadcast incorrectly
         tdser = tm.box_expected(tdser, box)
         err = TypeError
         if box is pd.Index and not dtype.startswith('float'):
@@ -865,9 +845,6 @@ def test_td64arr_sub_NaT(self, box):
 
     def test_td64arr_add_timedeltalike(self, delta, box):
         # only test adding/sub offsets as + is now numeric
-        if box is pd.DataFrame and isinstance(delta, pd.DateOffset):
-            pytest.xfail(reason="Returns object dtype instead of m8[ns]")
-
         rng = timedelta_range('1 days', '10 days')
         expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00',
                                    freq='D')
@@ -879,9 +856,6 @@ def test_td64arr_add_timedeltalike(self, delta, box):
 
     def test_td64arr_sub_timedeltalike(self, delta, box):
         # only test adding/sub offsets as - is now numeric
-        if box is pd.DataFrame and isinstance(delta, pd.DateOffset):
-            pytest.xfail(reason="Returns object dtype instead of m8[ns]")
-
         rng = timedelta_range('1 days', '10 days')
         expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00')
 
@@ -929,11 +903,7 @@ def test_timedelta64_operations_with_DateOffset(self):
 
     @pytest.mark.parametrize('box', [
         pd.Index,
-        pytest.param(Series,
-                     marks=pytest.mark.xfail(reason="Index fails to return "
-                                                    "NotImplemented on "
-                                                    "reverse op",
-                                             strict=True)),
+        Series,
         pytest.param(pd.DataFrame,
                      marks=pytest.mark.xfail(reason="Tries to broadcast "
                                                     "incorrectly",
@@ -1021,23 +991,12 @@ def test_td64arr_sub_offset_array(self, box_df_fail):
             res = tdi - other
         tm.assert_equal(res, expected)
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        pytest.param(Series,
-                     marks=pytest.mark.xfail(reason="object dtype Series "
-                                                    "fails to return "
-                                                    "NotImplemented",
-                                             strict=True, raises=TypeError)),
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="tries to broadcast "
-                                                    "incorrectly",
-                                             strict=True, raises=ValueError))
-    ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('names', [(None, None, None),
                                        ('foo', 'bar', None),
                                        ('foo', 'foo', 'foo')])
-    def test_td64arr_with_offset_series(self, names, box):
+    def test_td64arr_with_offset_series(self, names, box_df_fail):
         # GH#18849
+        box = box_df_fail  # tries to broadcast incorrectly
         box2 = Series if box is pd.Index else box
 
         tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'],
@@ -1132,9 +1091,6 @@ def test_td64arr_mul_int(self, box):
         tm.assert_equal(result, idx)
 
     def test_td64arr_mul_tdlike_scalar_raises(self, delta, box):
-        if box is pd.DataFrame and not isinstance(delta, pd.DateOffset):
-            pytest.xfail(reason="returns m8[ns] instead of raising")
-
         rng = timedelta_range('1 days', '10 days', name='foo')
         rng = tm.box_expected(rng, box)
         with pytest.raises(TypeError):