Skip to content

Commit

Permalink
TST: add validation checks on levels keyword from pd.concat (#46654)
Browse files Browse the repository at this point in the history
  • Loading branch information
GYHHAHA authored Apr 7, 2022
1 parent f3b8439 commit 361021b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 0 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ Other enhancements
- :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`)
- Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`)
- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`)
- :meth:`pd.concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`)
- :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_150.notable_bug_fixes:
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,8 @@ def _get_concat_axis(self) -> Index:
return idx

if self.keys is None:
if self.levels is not None:
raise ValueError("levels supported only when keys is not None")
concat_axis = _concat_indexes(indexes)
else:
concat_axis = _make_concat_multiindex(
Expand Down Expand Up @@ -712,6 +714,10 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
else:
levels = [ensure_index(x) for x in levels]

for level in levels:
if not level.is_unique:
raise ValueError(f"Level values not unique: {level.tolist()}")

if not all_indexes_same(indexes) or not all(level.is_unique for level in levels):
codes_list = []

Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/reshape/concat/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,3 +371,19 @@ def test_concat_with_key_not_unique(self):
out_b = df_b.loc[("x", 0), :]

tm.assert_frame_equal(out_a, out_b)

def test_concat_with_duplicated_levels(self):
# keyword levels should be unique
df1 = DataFrame({"A": [1]}, index=["x"])
df2 = DataFrame({"A": [1]}, index=["y"])
msg = r"Level values not unique: \['x', 'y', 'y'\]"
with pytest.raises(ValueError, match=msg):
concat([df1, df2], keys=["x", "y"], levels=[["x", "y", "y"]])

@pytest.mark.parametrize("levels", [[["x", "y"]], [["x", "y", "y"]]])
def test_concat_with_levels_with_none_keys(self, levels):
df1 = DataFrame({"A": [1]}, index=["x"])
df2 = DataFrame({"A": [1]}, index=["y"])
msg = "levels supported only when keys is not None"
with pytest.raises(ValueError, match=msg):
concat([df1, df2], levels=levels)

0 comments on commit 361021b

Please sign in to comment.