From 216b8e560d35627f440af359a909f0b3f053bd01 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 10 Apr 2014 09:12:07 -0400 Subject: [PATCH 1/2] API: allow Series comparison ops to align before comparison (GH1134) --- doc/source/release.rst | 1 + doc/source/v0.14.0.txt | 19 +++++++++++++++++++ pandas/core/ops.py | 15 ++++++++++----- pandas/tests/test_series.py | 34 +++++++++++++++++++++++++--------- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index a23936ae154c0..6838eb9c90581 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -171,6 +171,7 @@ API Changes - default sorting algorithm for ``Series.order`` is not ``quicksort``, to conform with ``Series.sort`` (and numpy defaults) - add ``inplace`` keyword to ``Series.order/sort`` to make them inverses (:issue:`6859`) +- align on Series comparison operations (e.g. ``x == y``), (:issue:`1134`) Deprecations ~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index ded10fd75e8d4..9d54043085b4f 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -213,6 +213,25 @@ API changes - default sorting algorithm for ``Series.order`` is not ``quicksort``, to conform with ``Series.sort`` (and numpy defaults) - add ``inplace`` keyword to ``Series.order/sort`` to make them inverses (:issue:`6859`) +- align on Series comparison operations (e.g. ``x == y``), (:issue:`1134`) + + This is a reordered comparison + + .. ipython:: python + + s1 = Series(index=["A", "B", "C"], data=[1,2,3]) + s1 + s2 = Series(index=["C", "B", "A"], data=[3,2,1]) + s2 + s1 == s2 + + In the following example, 'A' is missing so it will always compare False (as it has a ``nan`` value) + + .. ipython:: python + + s3 = Series(index=["C", "B"], data=[3,2]) + s3 + s1 == s3 .. _whatsnew_0140.sql: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index b8e92fb25cec5..8762d5edddd82 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -537,13 +537,18 @@ def na_op(x, y): def wrapper(self, other): if isinstance(other, pd.Series): name = _maybe_match_name(self, other) - if len(self) != len(other): - raise ValueError('Series lengths must match to compare') - return self._constructor(na_op(self.values, other.values), - index=self.index, name=name) + if self.index.equals(other): + s1, s2 = self, other + index = self.index + else: + index = self.index + other.index + s1 = self.reindex(index) + s2 = other.reindex(index) + return self._constructor(na_op(s1.values, s2.values), + index=index, name=name) elif isinstance(other, pd.DataFrame): # pragma: no cover return NotImplemented - elif isinstance(other, (pa.Array, pd.Series)): + elif isinstance(other, pa.Array): if len(self) != len(other): raise ValueError('Lengths must match to compare') return self._constructor(na_op(self.values, np.asarray(other)), diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 744a020347af9..daa76689621fa 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2354,6 +2354,31 @@ def check_comparators(series, other): check_comparators(self.ts, 5) check_comparators(self.ts, self.ts + 1) + def test_align_eq(self): + + # GH 1134 + # eq should align! + + # needs alignment + s1 = Series([1,2], ['a','b']) + s2 = Series([2,3], ['b','c']) + result1 = s1 == s2 + result2 = s2 == s1 + index = s1.index+s2.index + expected = s1.reindex(index) == s2.reindex(index) + assert_series_equal(result1,expected) + assert_series_equal(result2,expected) + + # differs in order + s1 = Series(index=["A", "B", "C"], data=[1,2,3]) + s2 = Series(index=["C", "B", "A"], data=[3,2,1]) + result1 = s1 == s2 + result2 = s2 == s1 + index = s1.index+s2.index + expected = s1.reindex(index) == s2.reindex(index) + assert_series_equal(result1,expected) + assert_series_equal(result2,expected) + def test_operators_empty_int_corner(self): s1 = Series([], [], dtype=np.int32) s2 = Series({'x': 0.}) @@ -3214,15 +3239,6 @@ def test_more_na_comparisons(self): expected = Series([True, True, True]) assert_series_equal(result, expected) - def test_comparison_different_length(self): - a = Series(['a', 'b', 'c']) - b = Series(['b', 'a']) - self.assertRaises(ValueError, a.__lt__, b) - - a = Series([1, 2]) - b = Series([2, 3, 4]) - self.assertRaises(ValueError, a.__eq__, b) - def test_comparison_label_based(self): # GH 4947 From 492b6cfbb4d9fdf43ff07fc0f8f31631042e768c Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 11 Apr 2014 20:51:06 -0400 Subject: [PATCH 2/2] TST: clean test_usjon/testSeries tests for Series non-alignable comparisons --- pandas/io/tests/test_json/test_ujson.py | 27 ++++++++++++++----------- pandas/tests/test_series.py | 6 ++++++ pandas/util/testing.py | 16 +++++++-------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 36963d193e5ae..6af45a81fe3eb 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -1241,48 +1241,51 @@ def testDataFrameNumpyLabelled(self): assert_array_equal(df.index, outp.index) def testSeries(self): + s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6,7,8,9,10,15]) s.sort() + def check(x,y): + tm.assert_series_equal(x,y,check_index_type=False) + y.index = Index(outp.astype('int64')) + tm.assert_series_equal(x,y) + # column indexed outp = Series(ujson.decode(ujson.encode(s))) outp.sort() - self.assertTrue((s == outp).values.all()) outp = Series(ujson.decode(ujson.encode(s), numpy=True)) outp.sort() - self.assertTrue((s == outp).values.all()) + check(s,outp) dec = _clean_dict(ujson.decode(ujson.encode(s, orient="split"))) outp = Series(**dec) - self.assertTrue((s == outp).values.all()) - self.assertTrue(s.name == outp.name) + check(s,outp) dec = _clean_dict(ujson.decode(ujson.encode(s, orient="split"), numpy=True)) outp = Series(**dec) - self.assertTrue((s == outp).values.all()) - self.assertTrue(s.name == outp.name) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="records"), numpy=True)) - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="records"))) - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="values"), numpy=True)) - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="values"))) - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="index"))) outp.sort() - self.assertTrue((s == outp).values.all()) + check(s,outp) outp = Series(ujson.decode(ujson.encode(s, orient="index"), numpy=True)) outp.sort() - self.assertTrue((s == outp).values.all()) + check(s,outp) def testSeriesNested(self): s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6,7,8,9,10,15]) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index daa76689621fa..b54c832d56943 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2379,6 +2379,12 @@ def test_align_eq(self): assert_series_equal(result1,expected) assert_series_equal(result2,expected) + s1 = Series([10,20,30,40,50,60],index=[6,7,8,9,10,15],name='series') + s2 = Series([10,20,30,40,50,60],index=[6,7,8,9,10,15]) + result = s1 == s2 + expected = Series(True,index=[6,7,8,9,10,15]) + assert_series_equal(result,expected) + def test_operators_empty_int_corner(self): s1 = Series([], [], dtype=np.int32) s2 = Series({'x': 0.}) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 8abbb37646b49..07484c60d54a8 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -511,15 +511,15 @@ def assert_series_equal(left, right, check_dtype=True, right.values)) else: assert_almost_equal(left.values, right.values, check_less_precise) - if check_less_precise: - assert_almost_equal( - left.index.values, right.index.values, check_less_precise) - else: - assert_index_equal(left.index, right.index) if check_index_type: - assert_isinstance(left.index, type(right.index)) - assert_attr_equal('dtype', left.index, right.index) - assert_attr_equal('inferred_type', left.index, right.index) + if check_less_precise: + assert_almost_equal( + left.index.values, right.index.values, check_less_precise) + else: + assert_index_equal(left.index, right.index) + assert_isinstance(left.index, type(right.index)) + assert_attr_equal('dtype', left.index, right.index) + assert_attr_equal('inferred_type', left.index, right.index) # This could be refactored to use the NDFrame.equals method def assert_frame_equal(left, right, check_dtype=True,