diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 77e8da6c44af2..a27199b58cf5e 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -5,6 +5,7 @@ import operator from datetime import datetime, timedelta import warnings +from itertools import product, starmap import numpy as np import pytest @@ -64,6 +65,32 @@ def test_tz_aware_scalar_comparison(self, timestamps): class TestDatetime64SeriesComparison(object): + # TODO: moved from tests.series.test_operators; needs cleanup + def test_comparison_invalid(self): + # GH#4968 + # invalid date/int comparisons + ser = Series(range(5)) + ser2 = Series(pd.date_range('20010101', periods=5)) + + for (x, y) in [(ser, ser2), (ser2, ser)]: + + result = x == y + expected = Series([False] * 5) + tm.assert_series_equal(result, expected) + + result = x != y + expected = Series([True] * 5) + tm.assert_series_equal(result, expected) + + with pytest.raises(TypeError): + x >= y + with pytest.raises(TypeError): + x > y + with pytest.raises(TypeError): + x < y + with pytest.raises(TypeError): + x <= y + @pytest.mark.parametrize('data', [ [Timestamp('2011-01-01'), NaT, Timestamp('2011-01-03')], [Timedelta('1 days'), NaT, Timedelta('3 days')], @@ -1360,7 +1387,95 @@ def test_dti_sub_pi(self, dti_freq, pi_freq, op, box): with pytest.raises(TypeError): op(dti, pi) - # ------------------------------------------------------------- + # ------------------------------------------------------------------- + # TODO: Most of this block is moved from series or frame tests, needs + # cleanup, box-parametrization, and de-duplication + + @pytest.mark.parametrize('op', [operator.add, operator.sub]) + def test_timedelta64_equal_timedelta_supported_ops(self, op): + ser = Series([Timestamp('20130301'), + Timestamp('20130228 23:00:00'), + Timestamp('20130228 22:00:00'), + Timestamp('20130228 21:00:00')]) + + intervals = ['D', 'h', 'm', 's', 'us'] + + # TODO: unused + # npy16_mappings = {'D': 24 * 60 * 60 * 1000000, + # 'h': 60 * 60 * 1000000, + # 'm': 60 * 1000000, + # 's': 1000000, + # 'us': 1} + + def timedelta64(*args): + return sum(starmap(np.timedelta64, zip(args, intervals))) + + for d, h, m, s, us in product(*([range(2)] * 5)): + nptd = timedelta64(d, h, m, s, us) + pytd = timedelta(days=d, hours=h, minutes=m, seconds=s, + microseconds=us) + lhs = op(ser, nptd) + rhs = op(ser, pytd) + + tm.assert_series_equal(lhs, rhs) + + def test_ops_nat_mixed_datetime64_timedelta64(self): + # GH#11349 + timedelta_series = Series([NaT, Timedelta('1s')]) + datetime_series = Series([NaT, Timestamp('19900315')]) + nat_series_dtype_timedelta = Series([NaT, NaT], + dtype='timedelta64[ns]') + nat_series_dtype_timestamp = Series([NaT, NaT], dtype='datetime64[ns]') + single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]') + single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]') + + # subtraction + tm.assert_series_equal(datetime_series - single_nat_dtype_datetime, + nat_series_dtype_timedelta) + + tm.assert_series_equal(datetime_series - single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + tm.assert_series_equal(-single_nat_dtype_timedelta + datetime_series, + nat_series_dtype_timestamp) + + # without a Series wrapping the NaT, it is ambiguous + # whether it is a datetime64 or timedelta64 + # defaults to interpreting it as timedelta64 + tm.assert_series_equal(nat_series_dtype_timestamp - + single_nat_dtype_datetime, + nat_series_dtype_timedelta) + + tm.assert_series_equal(nat_series_dtype_timestamp - + single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + tm.assert_series_equal(-single_nat_dtype_timedelta + + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) + + with pytest.raises(TypeError): + timedelta_series - single_nat_dtype_datetime + + # addition + tm.assert_series_equal(nat_series_dtype_timestamp + + single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + tm.assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) + + tm.assert_series_equal(nat_series_dtype_timestamp + + single_nat_dtype_timedelta, + nat_series_dtype_timestamp) + tm.assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timestamp, + nat_series_dtype_timestamp) + + tm.assert_series_equal(nat_series_dtype_timedelta + + single_nat_dtype_datetime, + nat_series_dtype_timestamp) + tm.assert_series_equal(single_nat_dtype_datetime + + nat_series_dtype_timedelta, + nat_series_dtype_timestamp) def test_ufunc_coercions(self): idx = date_range('2011-01-01', periods=3, freq='2D', name='x') diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 44aaba2885bf7..71742d428ea3e 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -4,6 +4,8 @@ # Specifically for numeric dtypes from datetime import timedelta from decimal import Decimal +import operator +from collections import Iterable import pytest import numpy as np @@ -12,6 +14,7 @@ import pandas.util.testing as tm from pandas.compat import PY3 +from pandas.core import ops from pandas import Timedelta, Series, Index, TimedeltaIndex @@ -382,17 +385,12 @@ def test_div_int(self, idx): expected = Index(idx.values / 2) tm.assert_index_equal(result, expected) - def test_mul_int(self, idx): - result = idx * 1 - tm.assert_index_equal(result, idx) + @pytest.mark.parametrize('op', [operator.mul, ops.rmul, operator.floordiv]) + def test_mul_int_identity(self, op, idx, box): + idx = tm.box_expected(idx, box) - def test_rmul_int(self, idx): - result = 1 * idx - tm.assert_index_equal(result, idx) - - def test_floordiv_int(self, idx): - result = idx // 1 - tm.assert_index_equal(result, idx) + result = op(idx, 1) + tm.assert_equal(result, idx) def test_mul_int_array(self, idx): didx = idx * idx @@ -434,22 +432,26 @@ def test_mul_size_mismatch_raises(self, idx): with pytest.raises(ValueError): idx * np.array([1, 2]) - def test_pow_float(self, idx): + @pytest.mark.parametrize('op', [operator.pow, ops.rpow]) + def test_pow_float(self, op, idx, box): # test power calculations both ways, GH#14973 - expected = pd.Float64Index(idx.values**2.0) - result = idx**2.0 - tm.assert_index_equal(result, expected) + expected = pd.Float64Index(op(idx.values, 2.0)) - def test_rpow_float(self, idx): - # test power calculations both ways, GH#14973 - expected = pd.Float64Index(2.0**idx.values) - result = 2.0**idx - tm.assert_index_equal(result, expected) + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + result = op(idx, 2.0) + tm.assert_equal(result, expected) - def test_modulo(self, idx): + def test_modulo(self, idx, box): # GH#9244 expected = Index(idx.values % 2) - tm.assert_index_equal(idx % 2, expected) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + result = idx % 2 + tm.assert_equal(result, expected) def test_divmod(self, idx): result = divmod(idx, 2) @@ -479,56 +481,282 @@ def test_divmod_series(self, idx): for r, e in zip(result, expected): tm.assert_series_equal(r, e) + @pytest.mark.parametrize('other', [np.nan, 7, -23, 2.718, -3.14, np.inf]) + def test_ops_np_scalar(self, other): + vals = np.random.randn(5, 3) + f = lambda x: pd.DataFrame(x, index=list('ABCDE'), + columns=['jim', 'joe', 'jolie']) + + df = f(vals) + + tm.assert_frame_equal(df / np.array(other), f(vals / other)) + tm.assert_frame_equal(np.array(other) * df, f(vals * other)) + tm.assert_frame_equal(df + np.array(other), f(vals + other)) + tm.assert_frame_equal(np.array(other) - df, f(other - vals)) + + # TODO: This came from series.test.test_operators, needs cleanup + def test_operators_frame(self): + # rpow does not work with DataFrame + ts = tm.makeTimeSeries() + ts.name = 'ts' + + df = pd.DataFrame({'A': ts}) + + tm.assert_series_equal(ts + ts, ts + df['A'], + check_names=False) + tm.assert_series_equal(ts ** ts, ts ** df['A'], + check_names=False) + tm.assert_series_equal(ts < ts, ts < df['A'], + check_names=False) + tm.assert_series_equal(ts / ts, ts / df['A'], + check_names=False) + class TestAdditionSubtraction(object): # __add__, __sub__, __radd__, __rsub__, __iadd__, __isub__ # for non-timestamp/timedelta/period dtypes - pass + + # TODO: This came from series.test.test_operators, needs cleanup + def test_arith_ops_df_compat(self): + # GH#1134 + s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x') + s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x') + + exp = pd.Series([3.0, 4.0, np.nan, np.nan], + index=list('ABCD'), name='x') + tm.assert_series_equal(s1 + s2, exp) + tm.assert_series_equal(s2 + s1, exp) + + exp = pd.DataFrame({'x': [3.0, 4.0, np.nan, np.nan]}, + index=list('ABCD')) + tm.assert_frame_equal(s1.to_frame() + s2.to_frame(), exp) + tm.assert_frame_equal(s2.to_frame() + s1.to_frame(), exp) + + # different length + s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') + s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') + + exp = pd.Series([3, 4, 5, np.nan], + index=list('ABCD'), name='x') + tm.assert_series_equal(s3 + s4, exp) + tm.assert_series_equal(s4 + s3, exp) + + exp = pd.DataFrame({'x': [3, 4, 5, np.nan]}, + index=list('ABCD')) + tm.assert_frame_equal(s3.to_frame() + s4.to_frame(), exp) + tm.assert_frame_equal(s4.to_frame() + s3.to_frame(), exp) + + # TODO: This came from series.test.test_operators, needs cleanup + def test_series_frame_radd_bug(self): + # GH#353 + vals = pd.Series(tm.rands_array(5, 10)) + result = 'foo_' + vals + expected = vals.map(lambda x: 'foo_' + x) + tm.assert_series_equal(result, expected) + + frame = pd.DataFrame({'vals': vals}) + result = 'foo_' + frame + expected = pd.DataFrame({'vals': vals.map(lambda x: 'foo_' + x)}) + tm.assert_frame_equal(result, expected) + + ts = tm.makeTimeSeries() + ts.name = 'ts' + + # really raise this time + now = pd.Timestamp.now().to_pydatetime() + with pytest.raises(TypeError): + now + ts + + with pytest.raises(TypeError): + ts + now + + # TODO: This came from series.test.test_operators, needs cleanup + def test_datetime64_with_index(self): + # arithmetic integer ops with an index + ser = pd.Series(np.random.randn(5)) + expected = ser - ser.index.to_series() + result = ser - ser.index + tm.assert_series_equal(result, expected) + + # GH#4629 + # arithmetic datetime64 ops with an index + ser = pd.Series(pd.date_range('20130101', periods=5), + index=pd.date_range('20130101', periods=5)) + expected = ser - ser.index.to_series() + result = ser - ser.index + tm.assert_series_equal(result, expected) + + with pytest.raises(TypeError): + # GH#18850 + result = ser - ser.index.to_period() + + df = pd.DataFrame(np.random.randn(5, 2), + index=pd.date_range('20130101', periods=5)) + df['date'] = pd.Timestamp('20130102') + df['expected'] = df['date'] - df.index.to_series() + df['result'] = df['date'] - df.index + tm.assert_series_equal(df['result'], df['expected'], check_names=False) + + # TODO: taken from tests.frame.test_operators, needs cleanup + def test_frame_operators(self): + seriesd = tm.getSeriesData() + frame = pd.DataFrame(seriesd) + frame2 = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) + + garbage = np.random.random(4) + colSeries = pd.Series(garbage, index=np.array(frame.columns)) + + idSum = frame + frame + seriesSum = frame + colSeries + + for col, series in idSum.items(): + for idx, val in series.items(): + origVal = frame[col][idx] * 2 + if not np.isnan(val): + assert val == origVal + else: + assert np.isnan(origVal) + + for col, series in seriesSum.items(): + for idx, val in series.items(): + origVal = frame[col][idx] + colSeries[col] + if not np.isnan(val): + assert val == origVal + else: + assert np.isnan(origVal) + + added = frame2 + frame2 + expected = frame2 * 2 + tm.assert_frame_equal(added, expected) + + df = pd.DataFrame({'a': ['a', None, 'b']}) + tm.assert_frame_equal(df + df, + pd.DataFrame({'a': ['aa', np.nan, 'bb']})) + + # Test for issue #10181 + for dtype in ('float', 'int64'): + frames = [ + pd.DataFrame(dtype=dtype), + pd.DataFrame(columns=['A'], dtype=dtype), + pd.DataFrame(index=[0], dtype=dtype), + ] + for df in frames: + assert (df + df).equals(df) + tm.assert_frame_equal(df + df, df) + + # TODO: taken from tests.series.test_operators; needs cleanup + def test_series_operators(self): + def _check_op(series, other, op, pos_only=False, check_dtype=True): + left = np.abs(series) if pos_only else series + right = np.abs(other) if pos_only else other + + cython_or_numpy = op(left, right) + python = left.combine(right, op) + tm.assert_series_equal(cython_or_numpy, python, + check_dtype=check_dtype) + + def check(series, other): + simple_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod'] + + for opname in simple_ops: + _check_op(series, other, getattr(operator, opname)) + + _check_op(series, other, operator.pow, pos_only=True) + + _check_op(series, other, lambda x, y: operator.add(y, x)) + _check_op(series, other, lambda x, y: operator.sub(y, x)) + _check_op(series, other, lambda x, y: operator.truediv(y, x)) + _check_op(series, other, lambda x, y: operator.floordiv(y, x)) + _check_op(series, other, lambda x, y: operator.mul(y, x)) + _check_op(series, other, lambda x, y: operator.pow(y, x), + pos_only=True) + _check_op(series, other, lambda x, y: operator.mod(y, x)) + + tser = tm.makeTimeSeries().rename('ts') + check(tser, tser * 2) + check(tser, tser * 0) + check(tser, tser[::2]) + check(tser, 5) + + def check_comparators(series, other, check_dtype=True): + _check_op(series, other, operator.gt, check_dtype=check_dtype) + _check_op(series, other, operator.ge, check_dtype=check_dtype) + _check_op(series, other, operator.eq, check_dtype=check_dtype) + _check_op(series, other, operator.lt, check_dtype=check_dtype) + _check_op(series, other, operator.le, check_dtype=check_dtype) + + check_comparators(tser, 5) + check_comparators(tser, tser + 1, check_dtype=False) + + # TODO: taken from tests.series.test_operators; needs cleanup + def test_divmod(self): + def check(series, other): + results = divmod(series, other) + if isinstance(other, Iterable) and len(series) != len(other): + # if the lengths don't match, this is the test where we use + # `tser[::2]`. Pad every other value in `other_np` with nan. + other_np = [] + for n in other: + other_np.append(n) + other_np.append(np.nan) + else: + other_np = other + other_np = np.asarray(other_np) + with np.errstate(all='ignore'): + expecteds = divmod(series.values, np.asarray(other_np)) + + for result, expected in zip(results, expecteds): + # check the values, name, and index separately + tm.assert_almost_equal(np.asarray(result), expected) + + assert result.name == series.name + tm.assert_index_equal(result.index, series.index) + + tser = tm.makeTimeSeries().rename('ts') + check(tser, tser * 2) + check(tser, tser * 0) + check(tser, tser[::2]) + check(tser, 5) class TestObjectDtypeEquivalence(object): # Tests that arithmetic operations match operations executed elementwise @pytest.mark.parametrize('dtype', [None, object]) - def test_series_with_dtype_radd_nan(self, dtype): + def test_numarr_with_dtype_add_nan(self, dtype, box): ser = pd.Series([1, 2, 3], dtype=dtype) expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + result = np.nan + ser - tm.assert_series_equal(result, expected) + tm.assert_equal(result, expected) result = ser + np.nan - tm.assert_series_equal(result, expected) + tm.assert_equal(result, expected) @pytest.mark.parametrize('dtype', [None, object]) - def test_series_with_dtype_radd_int(self, dtype): + def test_numarr_with_dtype_add_int(self, dtype, box): ser = pd.Series([1, 2, 3], dtype=dtype) expected = pd.Series([2, 3, 4], dtype=dtype) + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + result = 1 + ser - tm.assert_series_equal(result, expected) + tm.assert_equal(result, expected) result = ser + 1 - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('dtype', [None, object]) - def test_df_with_dtype_radd_nan(self, dtype): - df = pd.DataFrame([1, 2, 3], dtype=dtype) - expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) - - result = np.nan + df - tm.assert_frame_equal(result, expected) - - result = df + np.nan - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize('dtype', [None, object]) - def test_df_with_dtype_radd_int(self, dtype): - df = pd.DataFrame([1, 2, 3], dtype=dtype) - expected = pd.DataFrame([2, 3, 4], dtype=dtype) + tm.assert_equal(result, expected) - result = 1 + df - tm.assert_frame_equal(result, expected) + # TODO: moved from tests.series.test_operators; needs cleanup + @pytest.mark.parametrize('op', [operator.add, operator.sub, operator.mul, + operator.truediv, operator.floordiv]) + def test_operators_reverse_object(self, op): + # GH#56 + arr = pd.Series(np.random.randn(10), index=np.arange(10), dtype=object) - result = df + 1 - tm.assert_frame_equal(result, expected) + result = op(1., arr) + expected = op(1., arr.astype(float)) + tm.assert_series_equal(result.astype(float), expected) diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index e27ba6af39b90..c02c3becbd556 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -2,12 +2,14 @@ # Arithmetc tests for DataFrame/Series/Index/Array classes that should # behave identically. # Specifically for object dtype +import operator import pytest import numpy as np import pandas as pd import pandas.util.testing as tm +from pandas.core import ops from pandas import Series, Timestamp @@ -17,6 +19,18 @@ class TestObjectComparisons(object): + def test_comparison_object_numeric_nas(self): + ser = Series(np.random.randn(10), dtype=object) + shifted = ser.shift(2) + + ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] + for op in ops: + func = getattr(operator, op) + + result = func(ser, shifted) + expected = func(ser.astype(float), shifted.astype(float)) + tm.assert_series_equal(result, expected) + def test_object_comparisons(self): ser = Series(['a', 'b', np.nan, 'c', 'a']) @@ -58,32 +72,40 @@ def test_more_na_comparisons(self, dtype): # Arithmetic class TestArithmetic(object): - def test_df_radd_str(self): - df = pd.DataFrame(['x', np.nan, 'x']) - - expected = pd.DataFrame(['ax', np.nan, 'ax']) - result = 'a' + df - tm.assert_frame_equal(result, expected) - expected = pd.DataFrame(['xa', np.nan, 'xa']) - result = df + 'a' - tm.assert_frame_equal(result, expected) - - def test_series_radd_str(self): + @pytest.mark.parametrize('box', [ + pytest.param(pd.Index, + marks=pytest.mark.xfail(reason="Does not mask nulls", + strict=True, raises=TypeError)), + pd.Series, + pd.DataFrame + ], ids=lambda x: x.__name__) + def test_objarr_add_str(self, box): + ser = pd.Series(['x', np.nan, 'x']) + expected = pd.Series(['xa', np.nan, 'xa']) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = ser + 'a' + tm.assert_equal(result, expected) + + @pytest.mark.parametrize('box', [ + pytest.param(pd.Index, + marks=pytest.mark.xfail(reason="Does not mask nulls", + strict=True, raises=TypeError)), + pd.Series, + pd.DataFrame + ], ids=lambda x: x.__name__) + def test_objarr_radd_str(self, box): ser = pd.Series(['x', np.nan, 'x']) - tm.assert_series_equal('a' + ser, pd.Series(['ax', np.nan, 'ax'])) - tm.assert_series_equal(ser + 'a', pd.Series(['xa', np.nan, 'xa'])) + expected = pd.Series(['ax', np.nan, 'ax']) - @pytest.mark.parametrize('data', [ - [1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], - ['x', 'y', 1]]) - @pytest.mark.parametrize('dtype', [None, object]) - def test_df_radd_str_invalid(self, dtype, data): - df = pd.DataFrame(data, dtype=dtype) - with pytest.raises(TypeError): - 'foo_' + df + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = 'a' + ser + tm.assert_equal(result, expected) @pytest.mark.parametrize('data', [ [1, 2, 3], @@ -91,21 +113,54 @@ def test_df_radd_str_invalid(self, dtype, data): [Timestamp('2011-01-01'), Timestamp('2011-01-02'), pd.NaT], ['x', 'y', 1]]) @pytest.mark.parametrize('dtype', [None, object]) - def test_series_radd_str_invalid(self, dtype, data): + def test_objarr_radd_str_invalid(self, dtype, data, box): ser = Series(data, dtype=dtype) + + ser = tm.box_expected(ser, box) with pytest.raises(TypeError): 'foo_' + ser - # TODO: parametrize, better name - def test_object_ser_add_invalid(self): + @pytest.mark.parametrize('op', [operator.add, ops.radd, + operator.sub, ops.rsub]) + def test_objarr_add_invalid(self, op, box): # invalid ops + if box is pd.DataFrame and op is ops.radd: + pytest.xfail(reason="DataFrame op incorrectly casts the np.array" + "case to M8[ns]") + obj_ser = tm.makeObjectSeries() obj_ser.name = 'objects' + + obj_ser = tm.box_expected(obj_ser, box) with pytest.raises(Exception): - obj_ser + 1 - with pytest.raises(Exception): - obj_ser + np.array(1, dtype=np.int64) - with pytest.raises(Exception): - obj_ser - 1 + op(obj_ser, 1) with pytest.raises(Exception): - obj_ser - np.array(1, dtype=np.int64) + op(obj_ser, np.array(1, dtype=np.int64)) + + # TODO: Moved from tests.series.test_operators; needs cleanup + def test_operators_na_handling(self): + ser = Series(['foo', 'bar', 'baz', np.nan]) + result = 'prefix_' + ser + expected = pd.Series(['prefix_foo', 'prefix_bar', + 'prefix_baz', np.nan]) + tm.assert_series_equal(result, expected) + + result = ser + '_suffix' + expected = pd.Series(['foo_suffix', 'bar_suffix', + 'baz_suffix', np.nan]) + tm.assert_series_equal(result, expected) + + # TODO: parametrize over box + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_timedelta(self, dtype): + # note this test is _not_ aimed at timedelta64-dtyped Series + ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), + pd.Timedelta('3 days')], dtype=dtype) + expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), + pd.Timedelta('6 days')]) + + result = pd.Timedelta('3 days') + ser + tm.assert_series_equal(result, expected) + + result = ser + pd.Timedelta('3 days') + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 1c677cebd3bb0..9a17dc580ff6c 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1,12 +1,17 @@ # -*- coding: utf-8 -*- +import operator + import pytest import numpy as np -from pandas.compat import range +from pandas.compat import range, PY3 +import pandas.io.formats.printing as printing import pandas as pd import pandas.util.testing as tm +from pandas.tests.frame.common import _check_mixed_float, _check_mixed_int + # ------------------------------------------------------------------- # Comparisons @@ -78,3 +83,171 @@ def test_df_add_flex_filled_mixed_dtypes(self): dtype='datetime64[ns]'), 'B': ser * 2}) tm.assert_frame_equal(result, expected) + + def test_arith_flex_frame(self): + seriesd = tm.getSeriesData() + frame = pd.DataFrame(seriesd).copy() + + mixed_float = pd.DataFrame({'A': frame['A'].copy().astype('float32'), + 'B': frame['B'].copy().astype('float32'), + 'C': frame['C'].copy().astype('float16'), + 'D': frame['D'].copy().astype('float64')}) + + intframe = pd.DataFrame({k: v.astype(int) + for k, v in seriesd.items()}) + mixed_int = pd.DataFrame({'A': intframe['A'].copy().astype('int32'), + 'B': np.ones(len(intframe), dtype='uint64'), + 'C': intframe['C'].copy().astype('uint8'), + 'D': intframe['D'].copy().astype('int64')}) + + # force these all to int64 to avoid platform testing issues + intframe = pd.DataFrame({c: s for c, s in intframe.items()}, + dtype=np.int64) + + ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] + if not PY3: + aliases = {} + else: + aliases = {'div': 'truediv'} + + for op in ops: + try: + alias = aliases.get(op, op) + f = getattr(operator, alias) + result = getattr(frame, op)(2 * frame) + exp = f(frame, 2 * frame) + tm.assert_frame_equal(result, exp) + + # vs mix float + result = getattr(mixed_float, op)(2 * mixed_float) + exp = f(mixed_float, 2 * mixed_float) + tm.assert_frame_equal(result, exp) + _check_mixed_float(result, dtype=dict(C=None)) + + # vs mix int + if op in ['add', 'sub', 'mul']: + result = getattr(mixed_int, op)(2 + mixed_int) + exp = f(mixed_int, 2 + mixed_int) + + # no overflow in the uint + dtype = None + if op in ['sub']: + dtype = dict(B='uint64', C=None) + elif op in ['add', 'mul']: + dtype = dict(C=None) + tm.assert_frame_equal(result, exp) + _check_mixed_int(result, dtype=dtype) + + # rops + r_f = lambda x, y: f(y, x) + result = getattr(frame, 'r' + op)(2 * frame) + exp = r_f(frame, 2 * frame) + tm.assert_frame_equal(result, exp) + + # vs mix float + result = getattr(mixed_float, op)(2 * mixed_float) + exp = f(mixed_float, 2 * mixed_float) + tm.assert_frame_equal(result, exp) + _check_mixed_float(result, dtype=dict(C=None)) + + result = getattr(intframe, op)(2 * intframe) + exp = f(intframe, 2 * intframe) + tm.assert_frame_equal(result, exp) + + # vs mix int + if op in ['add', 'sub', 'mul']: + result = getattr(mixed_int, op)(2 + mixed_int) + exp = f(mixed_int, 2 + mixed_int) + + # no overflow in the uint + dtype = None + if op in ['sub']: + dtype = dict(B='uint64', C=None) + elif op in ['add', 'mul']: + dtype = dict(C=None) + tm.assert_frame_equal(result, exp) + _check_mixed_int(result, dtype=dtype) + except: + printing.pprint_thing("Failing operation %r" % op) + raise + + # ndim >= 3 + ndim_5 = np.ones(frame.shape + (3, 4, 5)) + msg = "Unable to coerce to Series/DataFrame" + with tm.assert_raises_regex(ValueError, msg): + f(frame, ndim_5) + + with tm.assert_raises_regex(ValueError, msg): + getattr(frame, op)(ndim_5) + + # res_add = frame.add(frame) + # res_sub = frame.sub(frame) + # res_mul = frame.mul(frame) + # res_div = frame.div(2 * frame) + + # tm.assert_frame_equal(res_add, frame + frame) + # tm.assert_frame_equal(res_sub, frame - frame) + # tm.assert_frame_equal(res_mul, frame * frame) + # tm.assert_frame_equal(res_div, frame / (2 * frame)) + + const_add = frame.add(1) + tm.assert_frame_equal(const_add, frame + 1) + + # corner cases + result = frame.add(frame[:0]) + tm.assert_frame_equal(result, frame * np.nan) + + result = frame[:0].add(frame) + tm.assert_frame_equal(result, frame * np.nan) + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + frame.add(frame.iloc[0], fill_value=3) + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + frame.add(frame.iloc[0], axis='index', fill_value=3) + + def test_arith_flex_series(self): + arr = np.array([[1., 2., 3.], + [4., 5., 6.], + [7., 8., 9.]]) + df = pd.DataFrame(arr, columns=['one', 'two', 'three'], + index=['a', 'b', 'c']) + + row = df.xs('a') + col = df['two'] + # after arithmetic refactor, add truediv here + ops = ['add', 'sub', 'mul', 'mod'] + for op in ops: + f = getattr(df, op) + op = getattr(operator, op) + tm.assert_frame_equal(f(row), op(df, row)) + tm.assert_frame_equal(f(col, axis=0), op(df.T, col).T) + + # special case for some reason + tm.assert_frame_equal(df.add(row, axis=None), df + row) + + # cases which will be refactored after big arithmetic refactor + tm.assert_frame_equal(df.div(row), df / row) + tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) + + # broadcasting issue in GH#7325 + df = pd.DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype='int64') + expected = pd.DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + result = df.div(df[0], axis='index') + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype='float64') + expected = pd.DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + result = df.div(df[0], axis='index') + tm.assert_frame_equal(result, expected) + + def test_arith_flex_zero_len_raises(self): + # GH#19522 passing fill_value to frame flex arith methods should + # raise even in the zero-length special cases + ser_len0 = pd.Series([]) + df_len0 = pd.DataFrame([], columns=['A', 'B']) + df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) + + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + df.add(ser_len0, fill_value='E') + + with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + df_len0.sub(df['A'], axis=None, fill_value=3) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index a11d673fd5d7f..2fc59c5003a4d 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -8,7 +8,7 @@ import pytest -from numpy import nan, random +from numpy import nan import numpy as np from pandas.compat import range @@ -16,7 +16,6 @@ from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range) import pandas.core.common as com -import pandas.io.formats.printing as printing import pandas as pd from pandas.util.testing import (assert_numpy_array_equal, @@ -25,66 +24,11 @@ import pandas.util.testing as tm -from pandas.tests.frame.common import (TestData, _check_mixed_float, - _check_mixed_int) +from pandas.tests.frame.common import TestData, _check_mixed_float class TestDataFrameOperators(TestData): - def test_operators(self): - garbage = random.random(4) - colSeries = Series(garbage, index=np.array(self.frame.columns)) - - idSum = self.frame + self.frame - seriesSum = self.frame + colSeries - - for col, series in compat.iteritems(idSum): - for idx, val in compat.iteritems(series): - origVal = self.frame[col][idx] * 2 - if not np.isnan(val): - assert val == origVal - else: - assert np.isnan(origVal) - - for col, series in compat.iteritems(seriesSum): - for idx, val in compat.iteritems(series): - origVal = self.frame[col][idx] + colSeries[col] - if not np.isnan(val): - assert val == origVal - else: - assert np.isnan(origVal) - - added = self.frame2 + self.frame2 - expected = self.frame2 * 2 - assert_frame_equal(added, expected) - - df = DataFrame({'a': ['a', None, 'b']}) - assert_frame_equal(df + df, DataFrame({'a': ['aa', np.nan, 'bb']})) - - # Test for issue #10181 - for dtype in ('float', 'int64'): - frames = [ - DataFrame(dtype=dtype), - DataFrame(columns=['A'], dtype=dtype), - DataFrame(index=[0], dtype=dtype), - ] - for df in frames: - assert (df + df).equals(df) - assert_frame_equal(df + df, df) - - @pytest.mark.parametrize('other', [nan, 7, -23, 2.718, -3.14, np.inf]) - def test_ops_np_scalar(self, other): - vals = np.random.randn(5, 3) - f = lambda x: DataFrame(x, index=list('ABCDE'), - columns=['jim', 'joe', 'jolie']) - - df = f(vals) - - assert_frame_equal(df / np.array(other), f(vals / other)) - assert_frame_equal(np.array(other) * df, f(vals * other)) - assert_frame_equal(df + np.array(other), f(vals + other)) - assert_frame_equal(np.array(other) - df, f(other - vals)) - def test_operators_boolean(self): # GH 5808 @@ -347,122 +291,6 @@ def test_pos_raises(self, df): with pytest.raises(TypeError): (+ df['a']) - def test_arith_flex_frame(self): - ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod'] - if not compat.PY3: - aliases = {} - else: - aliases = {'div': 'truediv'} - - for op in ops: - try: - alias = aliases.get(op, op) - f = getattr(operator, alias) - result = getattr(self.frame, op)(2 * self.frame) - exp = f(self.frame, 2 * self.frame) - assert_frame_equal(result, exp) - - # vs mix float - result = getattr(self.mixed_float, op)(2 * self.mixed_float) - exp = f(self.mixed_float, 2 * self.mixed_float) - assert_frame_equal(result, exp) - _check_mixed_float(result, dtype=dict(C=None)) - - # vs mix int - if op in ['add', 'sub', 'mul']: - result = getattr(self.mixed_int, op)(2 + self.mixed_int) - exp = f(self.mixed_int, 2 + self.mixed_int) - - # no overflow in the uint - dtype = None - if op in ['sub']: - dtype = dict(B='uint64', C=None) - elif op in ['add', 'mul']: - dtype = dict(C=None) - assert_frame_equal(result, exp) - _check_mixed_int(result, dtype=dtype) - - # rops - r_f = lambda x, y: f(y, x) - result = getattr(self.frame, 'r' + op)(2 * self.frame) - exp = r_f(self.frame, 2 * self.frame) - assert_frame_equal(result, exp) - - # vs mix float - result = getattr(self.mixed_float, op)( - 2 * self.mixed_float) - exp = f(self.mixed_float, 2 * self.mixed_float) - assert_frame_equal(result, exp) - _check_mixed_float(result, dtype=dict(C=None)) - - result = getattr(self.intframe, op)(2 * self.intframe) - exp = f(self.intframe, 2 * self.intframe) - assert_frame_equal(result, exp) - - # vs mix int - if op in ['add', 'sub', 'mul']: - result = getattr(self.mixed_int, op)( - 2 + self.mixed_int) - exp = f(self.mixed_int, 2 + self.mixed_int) - - # no overflow in the uint - dtype = None - if op in ['sub']: - dtype = dict(B='uint64', C=None) - elif op in ['add', 'mul']: - dtype = dict(C=None) - assert_frame_equal(result, exp) - _check_mixed_int(result, dtype=dtype) - except: - printing.pprint_thing("Failing operation %r" % op) - raise - - # ndim >= 3 - ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) - msg = "Unable to coerce to Series/DataFrame" - with tm.assert_raises_regex(ValueError, msg): - f(self.frame, ndim_5) - - with tm.assert_raises_regex(ValueError, msg): - getattr(self.frame, op)(ndim_5) - - # res_add = self.frame.add(self.frame) - # res_sub = self.frame.sub(self.frame) - # res_mul = self.frame.mul(self.frame) - # res_div = self.frame.div(2 * self.frame) - - # assert_frame_equal(res_add, self.frame + self.frame) - # assert_frame_equal(res_sub, self.frame - self.frame) - # assert_frame_equal(res_mul, self.frame * self.frame) - # assert_frame_equal(res_div, self.frame / (2 * self.frame)) - - const_add = self.frame.add(1) - assert_frame_equal(const_add, self.frame + 1) - - # corner cases - result = self.frame.add(self.frame[:0]) - assert_frame_equal(result, self.frame * np.nan) - - result = self.frame[:0].add(self.frame) - assert_frame_equal(result, self.frame * np.nan) - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): - self.frame.add(self.frame.iloc[0], fill_value=3) - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): - self.frame.add(self.frame.iloc[0], axis='index', fill_value=3) - - def test_arith_flex_zero_len_raises(self): - # GH#19522 passing fill_value to frame flex arith methods should - # raise even in the zero-length special cases - ser_len0 = pd.Series([]) - df_len0 = pd.DataFrame([], columns=['A', 'B']) - df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): - df.add(ser_len0, fill_value='E') - - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): - df_len0.sub(df['A'], axis=None, fill_value=3) - def test_binary_ops_align(self): # test aligning binary ops @@ -690,37 +518,6 @@ def test_dti_tz_convert_to_utc(self): exp = DataFrame({'A': [np.nan, 3, np.nan]}, index=base) assert_frame_equal(df1 + df2, exp) - def test_arith_flex_series(self): - df = self.simple - - row = df.xs('a') - col = df['two'] - # after arithmetic refactor, add truediv here - ops = ['add', 'sub', 'mul', 'mod'] - for op in ops: - f = getattr(df, op) - op = getattr(operator, op) - assert_frame_equal(f(row), op(df, row)) - assert_frame_equal(f(col, axis=0), op(df.T, col).T) - - # special case for some reason - assert_frame_equal(df.add(row, axis=None), df + row) - - # cases which will be refactored after big arithmetic refactor - assert_frame_equal(df.div(row), df / row) - assert_frame_equal(df.div(col, axis=0), (df.T / col).T) - - # broadcasting issue in GH7325 - df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype='int64') - expected = DataFrame([[nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) - result = df.div(df[0], axis='index') - assert_frame_equal(result, expected) - - df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype='float64') - expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) - result = df.div(df[0], axis='index') - assert_frame_equal(result, expected) - def test_arith_non_pandas_object(self): df = self.simple diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 15b30ec2c774a..41064b84abc36 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from datetime import timedelta import operator import numpy as np @@ -75,33 +74,3 @@ def test_ser_cmp_result_names(self, names, op): ser = Series(cidx).rename(names[1]) result = op(ser, cidx) assert result.name == names[2] - - -class TestTimedeltaSeriesComparisons(object): - def test_compare_timedelta_series(self): - # regresssion test for GH5963 - s = pd.Series([timedelta(days=1), timedelta(days=2)]) - actual = s > timedelta(days=1) - expected = pd.Series([False, True]) - tm.assert_series_equal(actual, expected) - - -# ------------------------------------------------------------------ -# Arithmetic - -class TestSeriesArithmetic(object): - # Standard, numeric, or otherwise not-Timestamp/Timedelta/Period dtypes - - @pytest.mark.parametrize('dtype', [None, object]) - def test_series_with_dtype_radd_timedelta(self, dtype): - # note this test is _not_ aimed at timedelta64-dtyped Series - ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), - pd.Timedelta('3 days')], dtype=dtype) - expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), - pd.Timedelta('6 days')]) - - result = pd.Timedelta('3 days') + ser - tm.assert_series_equal(result, expected) - - result = ser + pd.Timedelta('3 days') - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index c5d8d93bd768b..5e5e9c0895ccf 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -3,10 +3,8 @@ import pytest -from collections import Iterable from datetime import datetime, timedelta import operator -from itertools import product, starmap from numpy import nan import numpy as np @@ -15,13 +13,12 @@ from pandas import (Index, Series, DataFrame, isna, bdate_range, NaT, date_range, timedelta_range, Categorical) from pandas.core.indexes.datetimes import Timestamp -from pandas.core.indexes.timedeltas import Timedelta import pandas.core.nanops as nanops -from pandas.compat import range, zip +from pandas.compat import range from pandas import compat from pandas.util.testing import (assert_series_equal, assert_almost_equal, - assert_frame_equal, assert_index_equal) + assert_frame_equal) import pandas.util.testing as tm from .common import TestData @@ -182,39 +179,6 @@ def test_bool_operators_with_nas(self, bool_op): expected[mask] = False assert_series_equal(result, expected) - def test_comparison_object_numeric_nas(self): - ser = Series(np.random.randn(10), dtype=object) - shifted = ser.shift(2) - - ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] - for op in ops: - func = getattr(operator, op) - - result = func(ser, shifted) - expected = func(ser.astype(float), shifted.astype(float)) - assert_series_equal(result, expected) - - def test_comparison_invalid(self): - # GH4968 - # invalid date/int comparisons - s = Series(range(5)) - s2 = Series(date_range('20010101', periods=5)) - - for (x, y) in [(s, s2), (s2, s)]: - - result = x == y - expected = Series([False] * 5) - assert_series_equal(result, expected) - - result = x != y - expected = Series([True] * 5) - assert_series_equal(result, expected) - - pytest.raises(TypeError, lambda: x >= y) - pytest.raises(TypeError, lambda: x > y) - pytest.raises(TypeError, lambda: x < y) - pytest.raises(TypeError, lambda: x <= y) - def test_unequal_categorical_comparison_raises_type_error(self): # unequal comparison should raise for unordered cats cat = Series(Categorical(list("abc"))) @@ -498,158 +462,6 @@ def test_comp_ops_df_compat(self): left.to_frame() < right.to_frame() -class TestTimedeltaSeriesArithmetic(object): - - def test_operators_timedelta64(self): - # series ops - v1 = date_range('2012-1-1', periods=3, freq='D') - v2 = date_range('2012-1-2', periods=3, freq='D') - rs = Series(v2) - Series(v1) - xp = Series(1e9 * 3600 * 24, - rs.index).astype('int64').astype('timedelta64[ns]') - assert_series_equal(rs, xp) - assert rs.dtype == 'timedelta64[ns]' - - df = DataFrame(dict(A=v1)) - td = Series([timedelta(days=i) for i in range(3)]) - assert td.dtype == 'timedelta64[ns]' - - # series on the rhs - result = df['A'] - df['A'].shift() - assert result.dtype == 'timedelta64[ns]' - - result = df['A'] + td - assert result.dtype == 'M8[ns]' - - # scalar Timestamp on rhs - maxa = df['A'].max() - assert isinstance(maxa, Timestamp) - - resultb = df['A'] - df['A'].max() - assert resultb.dtype == 'timedelta64[ns]' - - # timestamp on lhs - result = resultb + df['A'] - values = [Timestamp('20111230'), Timestamp('20120101'), - Timestamp('20120103')] - expected = Series(values, name='A') - assert_series_equal(result, expected) - - # datetimes on rhs - result = df['A'] - datetime(2001, 1, 1) - expected = Series( - [timedelta(days=4017 + i) for i in range(3)], name='A') - assert_series_equal(result, expected) - assert result.dtype == 'm8[ns]' - - d = datetime(2001, 1, 1, 3, 4) - resulta = df['A'] - d - assert resulta.dtype == 'm8[ns]' - - # roundtrip - resultb = resulta + d - assert_series_equal(df['A'], resultb) - - # timedeltas on rhs - td = timedelta(days=1) - resulta = df['A'] + td - resultb = resulta - td - assert_series_equal(resultb, df['A']) - assert resultb.dtype == 'M8[ns]' - - # roundtrip - td = timedelta(minutes=5, seconds=3) - resulta = df['A'] + td - resultb = resulta - td - assert_series_equal(df['A'], resultb) - assert resultb.dtype == 'M8[ns]' - - # inplace - value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1)) - rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1)) - assert rs[2] == value - - def test_timedelta64_ops_nat(self): - # GH 11349 - timedelta_series = Series([NaT, Timedelta('1s')]) - nat_series_dtype_timedelta = Series([NaT, NaT], - dtype='timedelta64[ns]') - single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]') - - # subtraction - assert_series_equal(timedelta_series - NaT, - nat_series_dtype_timedelta) - assert_series_equal(-NaT + timedelta_series, - nat_series_dtype_timedelta) - - assert_series_equal(timedelta_series - single_nat_dtype_timedelta, - nat_series_dtype_timedelta) - assert_series_equal(-single_nat_dtype_timedelta + timedelta_series, - nat_series_dtype_timedelta) - - # addition - assert_series_equal(nat_series_dtype_timedelta + NaT, - nat_series_dtype_timedelta) - assert_series_equal(NaT + nat_series_dtype_timedelta, - nat_series_dtype_timedelta) - - assert_series_equal(nat_series_dtype_timedelta + - single_nat_dtype_timedelta, - nat_series_dtype_timedelta) - assert_series_equal(single_nat_dtype_timedelta + - nat_series_dtype_timedelta, - nat_series_dtype_timedelta) - - assert_series_equal(timedelta_series + NaT, - nat_series_dtype_timedelta) - assert_series_equal(NaT + timedelta_series, - nat_series_dtype_timedelta) - - assert_series_equal(timedelta_series + single_nat_dtype_timedelta, - nat_series_dtype_timedelta) - assert_series_equal(single_nat_dtype_timedelta + timedelta_series, - nat_series_dtype_timedelta) - - assert_series_equal(nat_series_dtype_timedelta + NaT, - nat_series_dtype_timedelta) - assert_series_equal(NaT + nat_series_dtype_timedelta, - nat_series_dtype_timedelta) - - assert_series_equal(nat_series_dtype_timedelta + - single_nat_dtype_timedelta, - nat_series_dtype_timedelta) - assert_series_equal(single_nat_dtype_timedelta + - nat_series_dtype_timedelta, - nat_series_dtype_timedelta) - - # multiplication - assert_series_equal(nat_series_dtype_timedelta * 1.0, - nat_series_dtype_timedelta) - assert_series_equal(1.0 * nat_series_dtype_timedelta, - nat_series_dtype_timedelta) - - assert_series_equal(timedelta_series * 1, timedelta_series) - assert_series_equal(1 * timedelta_series, timedelta_series) - - assert_series_equal(timedelta_series * 1.5, - Series([NaT, Timedelta('1.5s')])) - assert_series_equal(1.5 * timedelta_series, - Series([NaT, Timedelta('1.5s')])) - - assert_series_equal(timedelta_series * nan, - nat_series_dtype_timedelta) - assert_series_equal(nan * timedelta_series, - nat_series_dtype_timedelta) - - # division - assert_series_equal(timedelta_series / 2, - Series([NaT, Timedelta('0.5s')])) - assert_series_equal(timedelta_series / 2.0, - Series([NaT, Timedelta('0.5s')])) - assert_series_equal(timedelta_series / nan, - nat_series_dtype_timedelta) - - class TestDatetimeSeriesArithmetic(object): def test_operators_datetimelike_invalid(self, all_arithmetic_operators): @@ -739,8 +551,10 @@ class TestSeriesOperators(TestData): 'truediv', 'div', 'pow']) def test_op_method(self, opname, ts): # check that Series.{opname} behaves like Series.__{opname}__, - series = ts[0](self.ts) - other = ts[1](self.ts) + tser = tm.makeTimeSeries().rename('ts') + + series = ts[0](tser) + other = ts[1](tser) check_reverse = ts[2] if opname == 'div' and compat.PY3: @@ -768,188 +582,11 @@ def test_neg(self): def test_invert(self): assert_series_equal(-(self.series < 0), ~(self.series < 0)) - def test_operators(self): - def _check_op(series, other, op, pos_only=False, - check_dtype=True): - left = np.abs(series) if pos_only else series - right = np.abs(other) if pos_only else other - - cython_or_numpy = op(left, right) - python = left.combine(right, op) - assert_series_equal(cython_or_numpy, python, - check_dtype=check_dtype) - - def check(series, other): - simple_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod'] - - for opname in simple_ops: - _check_op(series, other, getattr(operator, opname)) - - _check_op(series, other, operator.pow, pos_only=True) - - _check_op(series, other, lambda x, y: operator.add(y, x)) - _check_op(series, other, lambda x, y: operator.sub(y, x)) - _check_op(series, other, lambda x, y: operator.truediv(y, x)) - _check_op(series, other, lambda x, y: operator.floordiv(y, x)) - _check_op(series, other, lambda x, y: operator.mul(y, x)) - _check_op(series, other, lambda x, y: operator.pow(y, x), - pos_only=True) - _check_op(series, other, lambda x, y: operator.mod(y, x)) - - check(self.ts, self.ts * 2) - check(self.ts, self.ts * 0) - check(self.ts, self.ts[::2]) - check(self.ts, 5) - - def check_comparators(series, other, check_dtype=True): - _check_op(series, other, operator.gt, check_dtype=check_dtype) - _check_op(series, other, operator.ge, check_dtype=check_dtype) - _check_op(series, other, operator.eq, check_dtype=check_dtype) - _check_op(series, other, operator.lt, check_dtype=check_dtype) - _check_op(series, other, operator.le, check_dtype=check_dtype) - - check_comparators(self.ts, 5) - check_comparators(self.ts, self.ts + 1, check_dtype=False) - - def test_divmod(self): - def check(series, other): - results = divmod(series, other) - if isinstance(other, Iterable) and len(series) != len(other): - # if the lengths don't match, this is the test where we use - # `self.ts[::2]`. Pad every other value in `other_np` with nan. - other_np = [] - for n in other: - other_np.append(n) - other_np.append(np.nan) - else: - other_np = other - other_np = np.asarray(other_np) - with np.errstate(all='ignore'): - expecteds = divmod(series.values, np.asarray(other_np)) - - for result, expected in zip(results, expecteds): - # check the values, name, and index separately - assert_almost_equal(np.asarray(result), expected) - - assert result.name == series.name - assert_index_equal(result.index, series.index) - - check(self.ts, self.ts * 2) - check(self.ts, self.ts * 0) - check(self.ts, self.ts[::2]) - check(self.ts, 5) - def test_operators_empty_int_corner(self): s1 = Series([], [], dtype=np.int32) s2 = Series({'x': 0.}) assert_series_equal(s1 * s2, Series([np.nan], index=['x'])) - @pytest.mark.parametrize("m", [1, 3, 10]) - @pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns']) - def test_timedelta64_conversions(self, m, unit): - - startdate = Series(date_range('2013-01-01', '2013-01-03')) - enddate = Series(date_range('2013-03-01', '2013-03-03')) - - s1 = enddate - startdate - s1[2] = np.nan - - # op - expected = s1.apply(lambda x: x / np.timedelta64(m, unit)) - result = s1 / np.timedelta64(m, unit) - assert_series_equal(result, expected) - - # reverse op - expected = s1.apply( - lambda x: Timedelta(np.timedelta64(m, unit)) / x) - result = np.timedelta64(m, unit) / s1 - assert_series_equal(result, expected) - - @pytest.mark.parametrize('op', [operator.add, operator.sub]) - def test_timedelta64_equal_timedelta_supported_ops(self, op): - ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'), - Timestamp('20130228 22:00:00'), - Timestamp('20130228 21:00:00')]) - - intervals = 'D', 'h', 'm', 's', 'us' - - # TODO: unused - # npy16_mappings = {'D': 24 * 60 * 60 * 1000000, - # 'h': 60 * 60 * 1000000, - # 'm': 60 * 1000000, - # 's': 1000000, - # 'us': 1} - - def timedelta64(*args): - return sum(starmap(np.timedelta64, zip(args, intervals))) - - for d, h, m, s, us in product(*([range(2)] * 5)): - nptd = timedelta64(d, h, m, s, us) - pytd = timedelta(days=d, hours=h, minutes=m, seconds=s, - microseconds=us) - lhs = op(ser, nptd) - rhs = op(ser, pytd) - - assert_series_equal(lhs, rhs) - - def test_ops_nat_mixed_datetime64_timedelta64(self): - # GH 11349 - timedelta_series = Series([NaT, Timedelta('1s')]) - datetime_series = Series([NaT, Timestamp('19900315')]) - nat_series_dtype_timedelta = Series([NaT, NaT], - dtype='timedelta64[ns]') - nat_series_dtype_timestamp = Series([NaT, NaT], dtype='datetime64[ns]') - single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]') - single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]') - - # subtraction - assert_series_equal(datetime_series - single_nat_dtype_datetime, - nat_series_dtype_timedelta) - - assert_series_equal(datetime_series - single_nat_dtype_timedelta, - nat_series_dtype_timestamp) - assert_series_equal(-single_nat_dtype_timedelta + datetime_series, - nat_series_dtype_timestamp) - - # without a Series wrapping the NaT, it is ambiguous - # whether it is a datetime64 or timedelta64 - # defaults to interpreting it as timedelta64 - assert_series_equal(nat_series_dtype_timestamp - - single_nat_dtype_datetime, - nat_series_dtype_timedelta) - - assert_series_equal(nat_series_dtype_timestamp - - single_nat_dtype_timedelta, - nat_series_dtype_timestamp) - assert_series_equal(-single_nat_dtype_timedelta + - nat_series_dtype_timestamp, - nat_series_dtype_timestamp) - - with pytest.raises(TypeError): - timedelta_series - single_nat_dtype_datetime - - # addition - assert_series_equal(nat_series_dtype_timestamp + - single_nat_dtype_timedelta, - nat_series_dtype_timestamp) - assert_series_equal(single_nat_dtype_timedelta + - nat_series_dtype_timestamp, - nat_series_dtype_timestamp) - - assert_series_equal(nat_series_dtype_timestamp + - single_nat_dtype_timedelta, - nat_series_dtype_timestamp) - assert_series_equal(single_nat_dtype_timedelta + - nat_series_dtype_timestamp, - nat_series_dtype_timestamp) - - assert_series_equal(nat_series_dtype_timedelta + - single_nat_dtype_datetime, - nat_series_dtype_timestamp) - assert_series_equal(single_nat_dtype_datetime + - nat_series_dtype_timedelta, - nat_series_dtype_timestamp) - def test_ops_datetimelike_align(self): # GH 7500 # datetimelike ops need to align @@ -1136,16 +773,6 @@ def test_operators_corner(self): index=self.ts.index[:-5], name='ts') tm.assert_series_equal(added[:-5], expected) - @pytest.mark.parametrize('op', [operator.add, operator.sub, operator.mul, - operator.truediv, operator.floordiv]) - def test_operators_reverse_object(self, op): - # GH 56 - arr = Series(np.random.randn(10), index=np.arange(10), dtype=object) - - result = op(1., arr) - expected = op(1., arr.astype(float)) - assert_series_equal(result.astype(float), expected) - pairings = [] for op in ['add', 'sub', 'mul', 'pow', 'truediv', 'floordiv']: fv = 0 @@ -1214,41 +841,6 @@ def test_operators_na_handling(self): assert isna(result[0]) assert isna(result2[0]) - s = Series(['foo', 'bar', 'baz', np.nan]) - result = 'prefix_' + s - expected = Series(['prefix_foo', 'prefix_bar', 'prefix_baz', np.nan]) - assert_series_equal(result, expected) - - result = s + '_suffix' - expected = Series(['foo_suffix', 'bar_suffix', 'baz_suffix', np.nan]) - assert_series_equal(result, expected) - - def test_datetime64_with_index(self): - # arithmetic integer ops with an index - ser = Series(np.random.randn(5)) - expected = ser - ser.index.to_series() - result = ser - ser.index - assert_series_equal(result, expected) - - # GH 4629 - # arithmetic datetime64 ops with an index - ser = Series(date_range('20130101', periods=5), - index=date_range('20130101', periods=5)) - expected = ser - ser.index.to_series() - result = ser - ser.index - assert_series_equal(result, expected) - - with pytest.raises(TypeError): - # GH#18850 - result = ser - ser.index.to_period() - - df = DataFrame(np.random.randn(5, 2), - index=date_range('20130101', periods=5)) - df['date'] = Timestamp('20130102') - df['expected'] = df['date'] - df.index.to_series() - df['result'] = df['date'] - df.index - assert_series_equal(df['result'], df['expected'], check_names=False) - def test_op_duplicate_index(self): # GH14227 s1 = Series([1, 2], index=[1, 1]) @@ -1306,43 +898,6 @@ def test_idxminmax_with_inf(self): class TestSeriesOperationsDataFrameCompat(object): - def test_operators_frame(self): - # rpow does not work with DataFrame - ts = tm.makeTimeSeries() - ts.name = 'ts' - - df = DataFrame({'A': ts}) - - assert_series_equal(ts + ts, ts + df['A'], - check_names=False) - assert_series_equal(ts ** ts, ts ** df['A'], - check_names=False) - assert_series_equal(ts < ts, ts < df['A'], - check_names=False) - assert_series_equal(ts / ts, ts / df['A'], - check_names=False) - - def test_series_frame_radd_bug(self): - # GH#353 - vals = Series(tm.rands_array(5, 10)) - result = 'foo_' + vals - expected = vals.map(lambda x: 'foo_' + x) - assert_series_equal(result, expected) - - frame = DataFrame({'vals': vals}) - result = 'foo_' + frame - expected = DataFrame({'vals': vals.map(lambda x: 'foo_' + x)}) - assert_frame_equal(result, expected) - - ts = tm.makeTimeSeries() - ts.name = 'ts' - - # really raise this time - with pytest.raises(TypeError): - datetime.now() + ts - - with pytest.raises(TypeError): - ts + datetime.now() def test_bool_ops_df_compat(self): # GH 1134 @@ -1401,32 +956,3 @@ def test_bool_ops_df_compat(self): index=list('ABCD')) assert_frame_equal(s3.to_frame() | s4.to_frame(), exp) assert_frame_equal(s4.to_frame() | s3.to_frame(), exp) - - def test_arith_ops_df_compat(self): - # GH#1134 - s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x') - s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x') - - exp = pd.Series([3.0, 4.0, np.nan, np.nan], - index=list('ABCD'), name='x') - assert_series_equal(s1 + s2, exp) - assert_series_equal(s2 + s1, exp) - - exp = pd.DataFrame({'x': [3.0, 4.0, np.nan, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s1.to_frame() + s2.to_frame(), exp) - assert_frame_equal(s2.to_frame() + s1.to_frame(), exp) - - # different length - s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') - s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') - - exp = pd.Series([3, 4, 5, np.nan], - index=list('ABCD'), name='x') - assert_series_equal(s3 + s4, exp) - assert_series_equal(s4 + s3, exp) - - exp = pd.DataFrame({'x': [3, 4, 5, np.nan]}, - index=list('ABCD')) - assert_frame_equal(s3.to_frame() + s4.to_frame(), exp) - assert_frame_equal(s4.to_frame() + s3.to_frame(), exp) diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py index c4bc9c71927fb..30d69e243c446 100644 --- a/pandas/tests/test_arithmetic.py +++ b/pandas/tests/test_arithmetic.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Arithmetc tests for DataFrame/Series/Index/Array classes that should # behave identically. -from datetime import timedelta +from datetime import datetime, timedelta import operator import pytest @@ -15,7 +15,8 @@ from pandas._libs.tslibs import IncompatibleFrequency from pandas import ( timedelta_range, - Timedelta, Timestamp, NaT, Series, TimedeltaIndex, DatetimeIndex) + Timedelta, Timestamp, NaT, Series, TimedeltaIndex, DatetimeIndex, + DataFrame) # ------------------------------------------------------------------ @@ -78,6 +79,13 @@ def box_df_fail(request): class TestTimedelta64ArrayComparisons(object): # TODO: All of these need to be parametrized over box + def test_compare_timedelta_series(self): + # regresssion test for GH5963 + s = pd.Series([timedelta(days=1), timedelta(days=2)]) + actual = s > timedelta(days=1) + expected = pd.Series([False, True]) + tm.assert_series_equal(actual, expected) + def test_tdi_cmp_str_invalid(self): # GH#13624 tdi = TimedeltaIndex(['1 day', '2 days']) @@ -242,6 +250,157 @@ def test_tdi_add_overflow(self): class TestTimedeltaArraylikeAddSubOps(object): # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__ + # TODO: moved from tests.series.test_operators, needs splitting, cleanup, + # de-duplication, box-parametrization... + def test_operators_timedelta64(self): + # series ops + v1 = pd.date_range('2012-1-1', periods=3, freq='D') + v2 = pd.date_range('2012-1-2', periods=3, freq='D') + rs = Series(v2) - Series(v1) + xp = Series(1e9 * 3600 * 24, + rs.index).astype('int64').astype('timedelta64[ns]') + tm.assert_series_equal(rs, xp) + assert rs.dtype == 'timedelta64[ns]' + + df = DataFrame(dict(A=v1)) + td = Series([timedelta(days=i) for i in range(3)]) + assert td.dtype == 'timedelta64[ns]' + + # series on the rhs + result = df['A'] - df['A'].shift() + assert result.dtype == 'timedelta64[ns]' + + result = df['A'] + td + assert result.dtype == 'M8[ns]' + + # scalar Timestamp on rhs + maxa = df['A'].max() + assert isinstance(maxa, Timestamp) + + resultb = df['A'] - df['A'].max() + assert resultb.dtype == 'timedelta64[ns]' + + # timestamp on lhs + result = resultb + df['A'] + values = [Timestamp('20111230'), Timestamp('20120101'), + Timestamp('20120103')] + expected = Series(values, name='A') + tm.assert_series_equal(result, expected) + + # datetimes on rhs + result = df['A'] - datetime(2001, 1, 1) + expected = Series( + [timedelta(days=4017 + i) for i in range(3)], name='A') + tm.assert_series_equal(result, expected) + assert result.dtype == 'm8[ns]' + + d = datetime(2001, 1, 1, 3, 4) + resulta = df['A'] - d + assert resulta.dtype == 'm8[ns]' + + # roundtrip + resultb = resulta + d + tm.assert_series_equal(df['A'], resultb) + + # timedeltas on rhs + td = timedelta(days=1) + resulta = df['A'] + td + resultb = resulta - td + tm.assert_series_equal(resultb, df['A']) + assert resultb.dtype == 'M8[ns]' + + # roundtrip + td = timedelta(minutes=5, seconds=3) + resulta = df['A'] + td + resultb = resulta - td + tm.assert_series_equal(df['A'], resultb) + assert resultb.dtype == 'M8[ns]' + + # inplace + value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1)) + rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1)) + assert rs[2] == value + + def test_timedelta64_ops_nat(self): + # GH 11349 + timedelta_series = Series([NaT, Timedelta('1s')]) + nat_series_dtype_timedelta = Series([NaT, NaT], + dtype='timedelta64[ns]') + single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]') + + # subtraction + tm.assert_series_equal(timedelta_series - NaT, + nat_series_dtype_timedelta) + tm.assert_series_equal(-NaT + timedelta_series, + nat_series_dtype_timedelta) + + tm.assert_series_equal(timedelta_series - single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + tm.assert_series_equal(-single_nat_dtype_timedelta + timedelta_series, + nat_series_dtype_timedelta) + + # addition + tm.assert_series_equal(nat_series_dtype_timedelta + NaT, + nat_series_dtype_timedelta) + tm.assert_series_equal(NaT + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + tm.assert_series_equal(nat_series_dtype_timedelta + + single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + tm.assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + tm.assert_series_equal(timedelta_series + NaT, + nat_series_dtype_timedelta) + tm.assert_series_equal(NaT + timedelta_series, + nat_series_dtype_timedelta) + + tm.assert_series_equal(timedelta_series + single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + tm.assert_series_equal(single_nat_dtype_timedelta + timedelta_series, + nat_series_dtype_timedelta) + + tm.assert_series_equal(nat_series_dtype_timedelta + NaT, + nat_series_dtype_timedelta) + tm.assert_series_equal(NaT + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + tm.assert_series_equal(nat_series_dtype_timedelta + + single_nat_dtype_timedelta, + nat_series_dtype_timedelta) + tm.assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + # multiplication + tm.assert_series_equal(nat_series_dtype_timedelta * 1.0, + nat_series_dtype_timedelta) + tm.assert_series_equal(1.0 * nat_series_dtype_timedelta, + nat_series_dtype_timedelta) + + tm.assert_series_equal(timedelta_series * 1, timedelta_series) + tm.assert_series_equal(1 * timedelta_series, timedelta_series) + + tm.assert_series_equal(timedelta_series * 1.5, + Series([NaT, Timedelta('1.5s')])) + tm.assert_series_equal(1.5 * timedelta_series, + Series([NaT, Timedelta('1.5s')])) + + tm.assert_series_equal(timedelta_series * np.nan, + nat_series_dtype_timedelta) + tm.assert_series_equal(np.nan * timedelta_series, + nat_series_dtype_timedelta) + + # division + tm.assert_series_equal(timedelta_series / 2, + Series([NaT, Timedelta('0.5s')])) + tm.assert_series_equal(timedelta_series / 2.0, + Series([NaT, Timedelta('0.5s')])) + tm.assert_series_equal(timedelta_series / np.nan, + nat_series_dtype_timedelta) + # ------------------------------------------------------------- # Invalid Operations @@ -967,6 +1126,27 @@ class TestTimedeltaArraylikeMulDivOps(object): # Tests for timedelta64[ns] # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + # TODO: Moved from tests.series.test_operators; needs cleanup + @pytest.mark.parametrize("m", [1, 3, 10]) + @pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns']) + def test_timedelta64_conversions(self, m, unit): + startdate = Series(pd.date_range('2013-01-01', '2013-01-03')) + enddate = Series(pd.date_range('2013-03-01', '2013-03-03')) + + ser = enddate - startdate + ser[2] = np.nan + + # op + expected = Series([x / np.timedelta64(m, unit) for x in ser]) + result = ser / np.timedelta64(m, unit) + tm.assert_series_equal(result, expected) + + # reverse op + expected = Series([Timedelta(np.timedelta64(m, unit)) / x + for x in ser]) + result = np.timedelta64(m, unit) / ser + tm.assert_series_equal(result, expected) + # ------------------------------------------------------------------ # Multiplication # organized with scalar others first, then array-like