diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 38b7a1d13c253..cbf0d4a4d708b 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -260,6 +260,7 @@ MultiIndex ^^^^^^^^^^ - Bug in :meth:`DataFrame.drop` raising ``TypeError`` when :class:`MultiIndex` is non-unique and no level is provided (:issue:`36293`) +- Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in result (:issue:`38623`) - Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differntly ordered (:issue:`38439`) - Bug in :meth:`MultiIndex.intersection` always returning empty when intersecting with :class:`CategoricalIndex` (:issue:`38653`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 61b6b7ff19edc..f058645c4abda 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3578,16 +3578,9 @@ def _intersection(self, other, sort=False): uniq_tuples = algos.unique(inner_tuples) if uniq_tuples is None: - other_uniq = set(rvals) - seen = set() - # pandas\core\indexes\multi.py:3503: error: "add" of "set" does not - # return a value [func-returns-value] - uniq_tuples = [ - x - for x in lvals - if x in other_uniq - and not (x in seen or seen.add(x)) # type: ignore[func-returns-value] - ] + left_unique = self.drop_duplicates() + indexer = left_unique.get_indexer(other.drop_duplicates()) + uniq_tuples = left_unique.take(np.sort(indexer[indexer != -1])) if sort is None: uniq_tuples = sorted(uniq_tuples) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index d5b29527ee08e..f872315374174 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -483,3 +483,12 @@ def test_intersection_different_names(): mi2 = MultiIndex.from_arrays([[1], [3]]) result = mi.intersection(mi2) tm.assert_index_equal(result, mi2) + + +def test_intersection_with_missing_values_on_both_sides(nulls_fixture): + # GH#38623 + mi1 = MultiIndex.from_arrays([[3, nulls_fixture, 4, nulls_fixture], [1, 2, 4, 2]]) + mi2 = MultiIndex.from_arrays([[3, nulls_fixture, 3], [1, 2, 4]]) + result = mi1.intersection(mi2) + expected = MultiIndex.from_arrays([[3.0, nulls_fixture], [1, 2]]) + tm.assert_index_equal(result, expected)