Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: pivot_table sometimes returns Series (#4386) #14534

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.19.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ Bug Fixes
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
- Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`)
- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
is not scalar and ``values`` is not specified (:issue:`14380`)
is not scalar and ``values`` is not specified (:issue:`14380`)
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,4 @@ Performance Improvements

Bug Fixes
~~~~~~~~~
- Bug in ``pd.pivot_table`` where a ``Series`` is return instead of a ``DataFrame`` (:issue:`4386`)
6 changes: 3 additions & 3 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -3120,9 +3120,9 @@ def test_pivot_table(self):
[Categorical(["a", "b", "z"], ordered=True),
Categorical(["c", "d", "y"], ordered=True)],
names=['A', 'B'])
expected = Series([1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan],
index=exp_index, name='values')
tm.assert_series_equal(result, expected)
exp_data = [1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan]
expected = DataFrame(exp_data, index=exp_index, columns=['values'])
tm.assert_frame_equal(result, expected)

def test_count(self):

Expand Down
7 changes: 5 additions & 2 deletions pandas/tools/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
"""
index = _convert_by(index)
columns = _convert_by(columns)

if isinstance(aggfunc, list):
pieces = []
keys = []
Expand Down Expand Up @@ -164,12 +163,16 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
margins_name=margins_name)

# discard the top level
if values_passed and not values_multi and not table.empty:
if (values_passed and not values_multi and not table.empty and
table.columns.nlevels > 1):
table = table[values[0]]

if len(index) == 0 and len(columns) > 0:
table = table.T

if isinstance(table, Series):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you see if you can find the actual root cause here?

e.g. may be as simple as making sure that the column/index seletors are list-like and not scalars

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @jreback. I found the root cause (here). I'll work on this and the tests tonight.

Once fixed, do you still want to keep the isinstance(table, Series) check or should I drop it?

Copy link
Contributor

@jreback jreback Nov 17, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually I think a you can simply always make aggfunc a list (if its not None nor a list-like) and this will just work. use is_list_like to test

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your solution works for the specific scenario in #4386, but a Seriesis still returned regardless when pivoting with only the columns argument or pivoting with no values or cols passed due to the groupby and unstack operations respectively. In these cases, I think to_frame() might be necessary if pivot_table should always return a DataFrame.

Or should I just fix the specific scenario in #4386 and change the docs to mention that pivot_table can return a Series?

table = table.to_frame()

return table


Expand Down
54 changes: 45 additions & 9 deletions pandas/tools/tests/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,13 +350,13 @@ def _check_output(result, values_col, index=['A', 'B'],
# no rows
rtable = self.data.pivot_table(columns=['AA', 'BB'], margins=True,
aggfunc=np.mean)
tm.assertIsInstance(rtable, Series)

table = self.data.pivot_table(index=['AA', 'BB'], margins=True,
aggfunc='mean')
for item in ['DD', 'EE', 'FF']:
totals = table.loc[('All', ''), item]
self.assertEqual(totals, self.data[item].mean())
expected = self.data.groupby(['AA', 'BB']).mean()
expected.loc[('All', ''), :] = self.data[['DD', 'EE', 'FF']].mean()
expected = (expected.stack()
.unstack(['BB', 'AA'])
.stack(['AA', 'BB'])
.to_frame())
tm.assert_frame_equal(expected, rtable)

# issue number #8349: pivot_table with margins and dictionary aggfunc
data = [
Expand Down Expand Up @@ -485,8 +485,11 @@ def test_margins_no_values_no_cols(self):
# Regression test on pivot table: no values or cols passed.
result = self.data[['A', 'B']].pivot_table(
index=['A', 'B'], aggfunc=len, margins=True)
result_list = result.tolist()
self.assertEqual(sum(result_list[:-1]), result_list[-1])
expected = self.data[['A', 'B']].groupby(['A', 'B']).apply(len)
expected.loc[('All', '')] = expected.sum()
expected = expected.to_frame()

tm.assert_frame_equal(result, expected)

def test_margins_no_values_two_rows(self):
# Regression test on pivot table: no values passed but rows are a
Expand Down Expand Up @@ -854,6 +857,39 @@ def test_categorical_margins(self):
table = data.pivot_table('x', 'y', 'z', margins=True)
tm.assert_frame_equal(table, expected)

def test_always_return_dataframe(self):
# GH 4386
df = DataFrame({'col1': [3, 4, 5],
'col2': ['C', 'D', 'E'],
'col3': [1, 3, 9]})
result = df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum)
m = MultiIndex.from_arrays([[1, 3, 9],
['C', 'D', 'E']],
names=['col3', 'col2'])
expected = DataFrame([3, 4, 5],
index=m, columns=['col1'])

tm.assert_frame_equal(result, expected)

result = df.pivot_table(
'col1', index='col3', columns='col2', aggfunc=np.sum
)
expected = DataFrame([[3, np.NaN, np.NaN],
[np.NaN, 4, np.NaN],
[np.NaN, np.NaN, 5]],
index=Index([1, 3, 9], name='col3'),
columns=Index(['C', 'D', 'E'], name='col2'))

tm.assert_frame_equal(result, expected)

result = df.pivot_table('col1', index='col3', aggfunc=[np.sum])
m = MultiIndex.from_arrays([['sum'],
['col1']])
expected = DataFrame([3, 4, 5],
index=Index([1, 3, 9], name='col3'),
columns=m)
tm.assert_frame_equal(result, expected)


class TestCrosstab(tm.TestCase):

Expand Down