diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index cf81540a77d11..12b9f67ddb846 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -240,13 +240,14 @@ Reshaping, sorting, transposing DataFrame.T DataFrame.transpose -Combining / joining / merging -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Combining / comparing / joining / merging +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ DataFrame.append DataFrame.assign + DataFrame.compare DataFrame.join DataFrame.merge DataFrame.update diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index ab0540a930396..797ade9594c7d 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -240,12 +240,13 @@ Reshaping, sorting Series.squeeze Series.view -Combining / joining / merging ------------------------------ +Combining / comparing / joining / merging +----------------------------------------- .. autosummary:: :toctree: api/ Series.append + Series.compare Series.replace Series.update diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 0450c81958a51..56ff8c1fc7c9b 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -10,15 +10,18 @@ p = doctools.TablePlotter() -**************************** -Merge, join, and concatenate -**************************** +************************************ +Merge, join, concatenate and compare +************************************ pandas provides various facilities for easily combining together Series or DataFrame with various kinds of set logic for the indexes and relational algebra functionality in the case of join / merge-type operations. +In addition, pandas also provides utilities to compare two Series or DataFrame +and summarize their differences. + .. _merging.concat: Concatenating objects @@ -1477,3 +1480,61 @@ exclude exact matches on time. Note that though we exclude the exact matches by='ticker', tolerance=pd.Timedelta('10ms'), allow_exact_matches=False) + +.. _merging.compare: + +Comparing objects +----------------- + +The :meth:`~Series.compare` and :meth:`~DataFrame.compare` methods allow you to +compare two DataFrame or Series, respectively, and summarize their differences. + +This feature was added in :ref:`V1.1.0 `. + +For example, you might want to compare two `DataFrame` and stack their differences +side by side. + +.. ipython:: python + + df = pd.DataFrame( + { + "col1": ["a", "a", "b", "b", "a"], + "col2": [1.0, 2.0, 3.0, np.nan, 5.0], + "col3": [1.0, 2.0, 3.0, 4.0, 5.0] + }, + columns=["col1", "col2", "col3"], + ) + df + +.. ipython:: python + + df2 = df.copy() + df2.loc[0, 'col1'] = 'c' + df2.loc[2, 'col3'] = 4.0 + df2 + +.. ipython:: python + + df.compare(df2) + +By default, if two corresponding values are equal, they will be shown as ``NaN``. +Furthermore, if all values in an entire row / column, the row / column will be +omitted from the result. The remaining differences will be aligned on columns. + +If you wish, you may choose to stack the differences on rows. + +.. ipython:: python + + df.compare(df2, align_axis=0) + +If you wish to keep all original rows and columns, set `keep_shape` argument +to ``True``. + +.. ipython:: python + + df.compare(df2, keep_shape=True) + +You may also keep all the original values even if they are equal. + +.. ipython:: python + df.compare(df2, keep_shape=True, keep_equal=True) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a1b7e09fe3ae1..a366afb98cfee 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -55,6 +55,39 @@ For example: ser.loc["May 2015"] +.. _whatsnew_110.dataframe_or_series_comparing: + +Comparing two `DataFrame` or two `Series` and summarizing the differences +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added :meth:`DataFrame.compare` and :meth:`Series.compare` for comparing two `DataFrame` or two `Series` (:issue:`30429`) + +.. ipython:: python + + df = pd.DataFrame( + { + "col1": ["a", "a", "b", "b", "a"], + "col2": [1.0, 2.0, 3.0, np.nan, 5.0], + "col3": [1.0, 2.0, 3.0, 4.0, 5.0] + }, + columns=["col1", "col2", "col3"], + ) + df + +.. ipython:: python + + df2 = df.copy() + df2.loc[0, 'col1'] = 'c' + df2.loc[2, 'col3'] = 4.0 + df2 + +.. ipython:: python + + df.compare(df2) + +See :ref:`User Guide ` for more details. + + .. _whatsnew_110.groupby_key: Allow NA in groupby key diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6532d084aa6fa..4911617b8eed4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5755,6 +5755,116 @@ def _construct_result(self, result) -> "DataFrame": out.index = self.index return out + @Appender( + """ +Returns +------- +DataFrame + DataFrame that shows the differences stacked side by side. + + The resulting index will be a MultiIndex with 'self' and 'other' + stacked alternately at the inner level. + +See Also +-------- +Series.compare : Compare with another Series and show differences. + +Notes +----- +Matching NaNs will not appear as a difference. + +Examples +-------- +>>> df = pd.DataFrame( +... { +... "col1": ["a", "a", "b", "b", "a"], +... "col2": [1.0, 2.0, 3.0, np.nan, 5.0], +... "col3": [1.0, 2.0, 3.0, 4.0, 5.0] +... }, +... columns=["col1", "col2", "col3"], +... ) +>>> df + col1 col2 col3 +0 a 1.0 1.0 +1 a 2.0 2.0 +2 b 3.0 3.0 +3 b NaN 4.0 +4 a 5.0 5.0 + +>>> df2 = df.copy() +>>> df2.loc[0, 'col1'] = 'c' +>>> df2.loc[2, 'col3'] = 4.0 +>>> df2 + col1 col2 col3 +0 c 1.0 1.0 +1 a 2.0 2.0 +2 b 3.0 4.0 +3 b NaN 4.0 +4 a 5.0 5.0 + +Align the differences on columns + +>>> df.compare(df2) + col1 col3 + self other self other +0 a c NaN NaN +2 NaN NaN 3.0 4.0 + +Stack the differences on rows + +>>> df.compare(df2, align_axis=0) + col1 col3 +0 self a NaN + other c NaN +2 self NaN 3.0 + other NaN 4.0 + +Keep the equal values + +>>> df.compare(df2, keep_equal=True) + col1 col3 + self other self other +0 a c 1.0 1.0 +2 b b 3.0 4.0 + +Keep all original rows and columns + +>>> df.compare(df2, keep_shape=True) + col1 col2 col3 + self other self other self other +0 a c NaN NaN NaN NaN +1 NaN NaN NaN NaN NaN NaN +2 NaN NaN NaN NaN 3.0 4.0 +3 NaN NaN NaN NaN NaN NaN +4 NaN NaN NaN NaN NaN NaN + +Keep all original rows and columns and also all original values + +>>> df.compare(df2, keep_shape=True, keep_equal=True) + col1 col2 col3 + self other self other self other +0 a c 1.0 1.0 1.0 1.0 +1 a a 2.0 2.0 2.0 2.0 +2 b b 3.0 3.0 3.0 4.0 +3 b b NaN NaN 4.0 4.0 +4 a a 5.0 5.0 5.0 5.0 +""" + ) + @Appender(_shared_docs["compare"] % _shared_doc_kwargs) + def compare( + self, + other: "DataFrame", + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + ) -> "DataFrame": + return super().compare( + other=other, + align_axis=align_axis, + keep_shape=keep_shape, + keep_equal=keep_equal, + ) + def combine( self, other: "DataFrame", func, fill_value=None, overwrite=True ) -> "DataFrame": diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 79805bec85af0..0260f30b9e7e2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8403,6 +8403,104 @@ def ranker(data): return ranker(data) + _shared_docs[ + "compare" + ] = """ + Compare to another %(klass)s and show the differences. + + .. versionadded:: 1.1.0 + + Parameters + ---------- + other : %(klass)s + Object to compare with. + + align_axis : {0 or 'index', 1 or 'columns'}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + + keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + + keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. + """ + + @Appender(_shared_docs["compare"] % _shared_doc_kwargs) + def compare( + self, + other, + align_axis: Axis = 1, + keep_shape: bool_t = False, + keep_equal: bool_t = False, + ): + from pandas.core.reshape.concat import concat + + if type(self) is not type(other): + cls_self, cls_other = type(self).__name__, type(other).__name__ + raise TypeError( + f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'" + ) + + mask = ~((self == other) | (self.isna() & other.isna())) + keys = ["self", "other"] + + if not keep_equal: + self = self.where(mask) + other = other.where(mask) + + if not keep_shape: + if isinstance(self, ABCDataFrame): + cmask = mask.any() + rmask = mask.any(axis=1) + self = self.loc[rmask, cmask] + other = other.loc[rmask, cmask] + else: + self = self[mask] + other = other[mask] + + if align_axis in (1, "columns"): # This is needed for Series + axis = 1 + else: + axis = self._get_axis_number(align_axis) + + diff = concat([self, other], axis=axis, keys=keys) + + if axis >= self.ndim: + # No need to reorganize data if stacking on new axis + # This currently applies for stacking two Series on columns + return diff + + ax = diff._get_axis(axis) + ax_names = np.array(ax.names) + + # set index names to positions to avoid confusion + ax.names = np.arange(len(ax_names)) + + # bring self-other to inner level + order = list(range(1, ax.nlevels)) + [0] + if isinstance(diff, ABCDataFrame): + diff = diff.reorder_levels(order, axis=axis) + else: + diff = diff.reorder_levels(order) + + # restore the index names in order + diff._get_axis(axis=axis).names = ax_names[order] + + # reorder axis to keep things organized + indices = ( + np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten() + ) + diff = diff.take(indices, axis=axis) + + return diff + @doc(**_shared_doc_kwargs) def align( self, diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 28dfaea8ed425..db7e9265ac21d 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._typing import FrameOrSeriesUnion, Label +from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, Label from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries @@ -50,7 +50,7 @@ def concat( @overload def concat( - objs: Union[Iterable[FrameOrSeriesUnion], Mapping[Label, FrameOrSeriesUnion]], + objs: Union[Iterable[FrameOrSeries], Mapping[Label, FrameOrSeries]], axis=0, join: str = "outer", ignore_index: bool = False, @@ -65,7 +65,7 @@ def concat( def concat( - objs: Union[Iterable[FrameOrSeriesUnion], Mapping[Label, FrameOrSeriesUnion]], + objs: Union[Iterable[FrameOrSeries], Mapping[Label, FrameOrSeries]], axis=0, join="outer", ignore_index: bool = False, diff --git a/pandas/core/series.py b/pandas/core/series.py index bc13d5376ec96..ba8bd95cd1749 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -24,7 +24,15 @@ from pandas._libs import lib, properties, reshape, tslibs from pandas._libs.lib import no_default -from pandas._typing import ArrayLike, Axis, DtypeObj, IndexKeyFunc, Label, ValueKeyFunc +from pandas._typing import ( + ArrayLike, + Axis, + DtypeObj, + FrameOrSeriesUnion, + IndexKeyFunc, + Label, + ValueKeyFunc, +) from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, doc from pandas.util._validators import validate_bool_kwarg, validate_percentile @@ -2676,6 +2684,83 @@ def _construct_result( out.name = name return out + @Appender( + """ +Returns +------- +Series or DataFrame + If axis is 0 or 'index' the result will be a Series. + The resulting index will be a MultiIndex with 'self' and 'other' + stacked alternately at the inner level. + + If axis is 1 or 'columns' the result will be a DataFrame. + It will have two columns namely 'self' and 'other'. + +See Also +-------- +DataFrame.compare : Compare with another DataFrame and show differences. + +Notes +----- +Matching NaNs will not appear as a difference. + +Examples +-------- +>>> s1 = pd.Series(["a", "b", "c", "d", "e"]) +>>> s2 = pd.Series(["a", "a", "c", "b", "e"]) + +Align the differences on columns + +>>> s1.compare(s2) + self other +1 b a +3 d b + +Stack the differences on indices + +>>> s1.compare(s2, align_axis=0) +1 self b + other a +3 self d + other b +dtype: object + +Keep all original rows + +>>> s1.compare(s2, keep_shape=True) + self other +0 NaN NaN +1 b a +2 NaN NaN +3 d b +4 NaN NaN + +Keep all original rows and also all original values + +>>> s1.compare(s2, keep_shape=True, keep_equal=True) + self other +0 a a +1 b a +2 c c +3 d b +4 e e +""" + ) + @Appender(generic._shared_docs["compare"] % _shared_doc_kwargs) + def compare( + self, + other: "Series", + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + ) -> FrameOrSeriesUnion: + return super().compare( + other=other, + align_axis=align_axis, + keep_shape=keep_shape, + keep_equal=keep_equal, + ) + def combine(self, other, func, fill_value=None) -> "Series": """ Combine the Series with a Series or scalar according to `func`. diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py new file mode 100644 index 0000000000000..468811eba0d39 --- /dev/null +++ b/pandas/tests/frame/methods/test_compare.py @@ -0,0 +1,182 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"]) +def test_compare_axis(align_axis): + # GH#30429 + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + + result = df.compare(df2, align_axis=align_axis) + + if align_axis in (1, "columns"): + indices = pd.Index([0, 2]) + columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) + expected = pd.DataFrame( + [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]], + index=indices, + columns=columns, + ) + else: + indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) + columns = pd.Index(["col1", "col3"]) + expected = pd.DataFrame( + [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]], + index=indices, + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "keep_shape, keep_equal", + [ + (True, False), + (False, True), + (True, True), + # False, False case is already covered in test_compare_axis + ], +) +def test_compare_various_formats(keep_shape, keep_equal): + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + + result = df.compare(df2, keep_shape=keep_shape, keep_equal=keep_equal) + + if keep_shape: + indices = pd.Index([0, 1, 2]) + columns = pd.MultiIndex.from_product( + [["col1", "col2", "col3"], ["self", "other"]] + ) + if keep_equal: + expected = pd.DataFrame( + [ + ["a", "c", 1.0, 1.0, 1.0, 1.0], + ["b", "b", 2.0, 2.0, 2.0, 2.0], + ["c", "c", np.nan, np.nan, 3.0, 4.0], + ], + index=indices, + columns=columns, + ) + else: + expected = pd.DataFrame( + [ + ["a", "c", np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 3.0, 4.0], + ], + index=indices, + columns=columns, + ) + else: + indices = pd.Index([0, 2]) + columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) + expected = pd.DataFrame( + [["a", "c", 1.0, 1.0], ["c", "c", 3.0, 4.0]], index=indices, columns=columns + ) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_equal_nulls(): + # We want to make sure two NaNs are considered the same + # and dropped where applicable + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + + result = df.compare(df2) + indices = pd.Index([0]) + columns = pd.MultiIndex.from_product([["col1"], ["self", "other"]]) + expected = pd.DataFrame([["a", "c"]], index=indices, columns=columns) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_non_equal_nulls(): + # We want to make sure the relevant NaNs do not get dropped + # even if the entire row or column are NaNs + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = np.nan + + result = df.compare(df2) + + indices = pd.Index([0, 2]) + columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]]) + expected = pd.DataFrame( + [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, np.nan]], + index=indices, + columns=columns, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("align_axis", [0, 1]) +def test_compare_multi_index(align_axis): + df = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]} + ) + df.columns = pd.MultiIndex.from_arrays([["a", "a", "b"], ["col1", "col2", "col3"]]) + df.index = pd.MultiIndex.from_arrays([["x", "x", "y"], [0, 1, 2]]) + + df2 = df.copy() + df2.iloc[0, 0] = "c" + df2.iloc[2, 2] = 4.0 + + result = df.compare(df2, align_axis=align_axis) + + if align_axis == 0: + indices = pd.MultiIndex.from_arrays( + [["x", "x", "y", "y"], [0, 0, 2, 2], ["self", "other", "self", "other"]] + ) + columns = pd.MultiIndex.from_arrays([["a", "b"], ["col1", "col3"]]) + data = [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]] + else: + indices = pd.MultiIndex.from_arrays([["x", "y"], [0, 2]]) + columns = pd.MultiIndex.from_arrays( + [ + ["a", "a", "b", "b"], + ["col1", "col1", "col3", "col3"], + ["self", "other", "self", "other"], + ] + ) + data = [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]] + + expected = pd.DataFrame(data=data, index=indices, columns=columns) + tm.assert_frame_equal(result, expected) + + +def test_compare_unaligned_objects(): + # test DataFrames with different indices + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"]) + df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"]) + df1.compare(df2) + + # test DataFrames with different shapes + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + df1 = pd.DataFrame(np.ones((3, 3))) + df2 = pd.DataFrame(np.zeros((2, 1))) + df1.compare(df2) diff --git a/pandas/tests/series/methods/test_compare.py b/pandas/tests/series/methods/test_compare.py new file mode 100644 index 0000000000000..8570800048898 --- /dev/null +++ b/pandas/tests/series/methods/test_compare.py @@ -0,0 +1,116 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"]) +def test_compare_axis(align_axis): + # GH#30429 + s1 = pd.Series(["a", "b", "c"]) + s2 = pd.Series(["x", "b", "z"]) + + result = s1.compare(s2, align_axis=align_axis) + + if align_axis in (1, "columns"): + indices = pd.Index([0, 2]) + columns = pd.Index(["self", "other"]) + expected = pd.DataFrame( + [["a", "x"], ["c", "z"]], index=indices, columns=columns + ) + tm.assert_frame_equal(result, expected) + else: + indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) + expected = pd.Series(["a", "x", "c", "z"], index=indices) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "keep_shape, keep_equal", + [ + (True, False), + (False, True), + (True, True), + # False, False case is already covered in test_compare_axis + ], +) +def test_compare_various_formats(keep_shape, keep_equal): + s1 = pd.Series(["a", "b", "c"]) + s2 = pd.Series(["x", "b", "z"]) + + result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal) + + if keep_shape: + indices = pd.Index([0, 1, 2]) + columns = pd.Index(["self", "other"]) + if keep_equal: + expected = pd.DataFrame( + [["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns + ) + else: + expected = pd.DataFrame( + [["a", "x"], [np.nan, np.nan], ["c", "z"]], + index=indices, + columns=columns, + ) + else: + indices = pd.Index([0, 2]) + columns = pd.Index(["self", "other"]) + expected = pd.DataFrame( + [["a", "x"], ["c", "z"]], index=indices, columns=columns + ) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_equal_nulls(): + # We want to make sure two NaNs are considered the same + # and dropped where applicable + s1 = pd.Series(["a", "b", np.nan]) + s2 = pd.Series(["x", "b", np.nan]) + + result = s1.compare(s2) + expected = pd.DataFrame([["a", "x"]], columns=["self", "other"]) + tm.assert_frame_equal(result, expected) + + +def test_compare_with_non_equal_nulls(): + # We want to make sure the relevant NaNs do not get dropped + s1 = pd.Series(["a", "b", "c"]) + s2 = pd.Series(["x", "b", np.nan]) + + result = s1.compare(s2, align_axis=0) + + indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]]) + expected = pd.Series(["a", "x", "c", np.nan], index=indices) + tm.assert_series_equal(result, expected) + + +def test_compare_multi_index(): + index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]]) + s1 = pd.Series(["a", "b", "c"], index=index) + s2 = pd.Series(["x", "b", "z"], index=index) + + result = s1.compare(s2, align_axis=0) + + indices = pd.MultiIndex.from_arrays( + [[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]] + ) + expected = pd.Series(["a", "x", "c", "z"], index=indices) + tm.assert_series_equal(result, expected) + + +def test_compare_unaligned_objects(): + # test Series with different indices + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"]) + ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"]) + ser1.compare(ser2) + + # test Series with different lengths + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + ser1 = pd.Series([1, 2, 3]) + ser2 = pd.Series([1, 2, 3, 4]) + ser1.compare(ser2)