From ed04a29e380bb2b257e827f54817b93e313b7f4b Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Tue, 15 Sep 2020 17:35:40 -0500 Subject: [PATCH] Backport PR #36371: BUG: Fix MultiIndex column stacking with dupe names --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/reshape/reshape.py | 14 +++++--------- pandas/tests/frame/test_reshape.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 7f5340b7022ce..0ffa86def4376 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -30,6 +30,7 @@ Bug fixes - Bug in :func:`read_spss` where passing a ``pathlib.Path`` as ``path`` would raise a ``TypeError`` (:issue:`33666`) - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) - Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) +- Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 391313fbb5283..1d4c9a7826178 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -588,19 +588,15 @@ def _stack_multi_columns(frame, level_num=-1, dropna=True): def _convert_level_number(level_num, columns): """ Logic for converting the level number to something we can safely pass - to swaplevel: + to swaplevel. - We generally want to convert the level number into a level name, except - when columns do not have names, in which case we must leave as a level - number + If `level_num` matches a column name return the name from + position `level_num`, otherwise return `level_num`. """ if level_num in columns.names: return columns.names[level_num] - else: - if columns.names[level_num] is None: - return level_num - else: - return columns.names[level_num] + + return level_num this = frame.copy() diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 6a8f1e7c1aca2..1b452658cc219 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1302,3 +1302,16 @@ def test_unstacking_multi_index_df(): ), ) tm.assert_frame_equal(result, expected) + + +def test_stack_positional_level_duplicate_column_names(): + # https://github.com/pandas-dev/pandas/issues/36353 + columns = pd.MultiIndex.from_product([("x", "y"), ("y", "z")], names=["a", "a"]) + df = pd.DataFrame([[1, 1, 1, 1]], columns=columns) + result = df.stack(0) + + new_columns = pd.Index(["y", "z"], name="a") + new_index = pd.MultiIndex.from_tuples([(0, "x"), (0, "y")], names=[None, "a"]) + expected = pd.DataFrame([[1, 1], [1, 1]], index=new_index, columns=new_columns) + + tm.assert_frame_equal(result, expected)