Skip to content

Commit 421dcf4

Browse files
chrisjbillingtonjowens
authored andcommitted
Bugfix for multilevel columns with empty strings in Python 2 (pandas-dev#17099)
1 parent 7280e6c commit 421dcf4

File tree

3 files changed

+25
-9
lines changed

3 files changed

+25
-9
lines changed

Diff for: doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ Indexing
324324
- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`)
325325
- Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`)
326326
- Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`)
327+
- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
327328

328329
I/O
329330
^^^

Diff for: pandas/core/frame.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -2134,10 +2134,18 @@ def _getitem_multilevel(self, key):
21342134
result = self._constructor(new_values, index=self.index,
21352135
columns=result_columns)
21362136
result = result.__finalize__(self)
2137+
2138+
# If there is only one column being returned, and its name is
2139+
# either an empty string, or a tuple with an empty string as its
2140+
# first element, then treat the empty string as a placeholder
2141+
# and return the column as if the user had provided that empty
2142+
# string in the key. If the result is a Series, exclude the
2143+
# implied empty string from its name.
21372144
if len(result.columns) == 1:
21382145
top = result.columns[0]
2139-
if ((type(top) == str and top == '') or
2140-
(type(top) == tuple and top[0] == '')):
2146+
if isinstance(top, tuple):
2147+
top = top[0]
2148+
if top == '':
21412149
result = result['']
21422150
if isinstance(result, Series):
21432151
result = self._constructor_sliced(result,

Diff for: pandas/tests/test_multilevel.py

+14-7
Original file line numberDiff line numberDiff line change
@@ -1675,24 +1675,31 @@ def test_int_series_slicing(self):
16751675
expected = self.ymd.reindex(s.index[5:])
16761676
tm.assert_frame_equal(result, expected)
16771677

1678-
def test_mixed_depth_get(self):
1678+
@pytest.mark.parametrize('unicode_strings', [True, False])
1679+
def test_mixed_depth_get(self, unicode_strings):
1680+
# If unicode_strings is True, the column labels in dataframe
1681+
# construction will use unicode strings in Python 2 (pull request
1682+
# #17099).
1683+
16791684
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
16801685
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
16811686
['', 'wx', 'wy', '', '', '']]
16821687

1688+
if unicode_strings:
1689+
arrays = [[u(s) for s in arr] for arr in arrays]
1690+
16831691
tuples = sorted(zip(*arrays))
16841692
index = MultiIndex.from_tuples(tuples)
1685-
df = DataFrame(randn(4, 6), columns=index)
1693+
df = DataFrame(np.random.randn(4, 6), columns=index)
16861694

16871695
result = df['a']
1688-
expected = df['a', '', '']
1689-
tm.assert_series_equal(result, expected, check_names=False)
1690-
assert result.name == 'a'
1696+
expected = df['a', '', ''].rename('a')
1697+
tm.assert_series_equal(result, expected)
16911698

16921699
result = df['routine1', 'result1']
16931700
expected = df['routine1', 'result1', '']
1694-
tm.assert_series_equal(result, expected, check_names=False)
1695-
assert result.name == ('routine1', 'result1')
1701+
expected = expected.rename(('routine1', 'result1'))
1702+
tm.assert_series_equal(result, expected)
16961703

16971704
def test_mixed_depth_insert(self):
16981705
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],

0 commit comments

Comments
 (0)