From 02ef0a8108b04019e8d9ed64855a147677cca1ea Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 28 Oct 2016 23:42:45 -0700 Subject: [PATCH] BUG: pivot_table sometimes returns Series (#4386) BUG: pivot_table someitmes returns Series (#4386) BUG: pivot_table sometimes returns Series (#4386) BUG: pivot_table sometimes returns Series (#4386) pep 8 fixes Restructure condional and update whatsnew --- doc/source/whatsnew/v0.19.1.txt | 2 +- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/test_categorical.py | 6 ++-- pandas/tools/pivot.py | 7 +++-- pandas/tools/tests/test_pivot.py | 54 ++++++++++++++++++++++++++------ 5 files changed, 55 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index db5bd22393e64..545b4380d9b75 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -58,4 +58,4 @@ Bug Fixes - Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`) - Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`) - Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns`` - is not scalar and ``values`` is not specified (:issue:`14380`) \ No newline at end of file + is not scalar and ``values`` is not specified (:issue:`14380`) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7fa9991138fba..696911a7e96b4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -80,3 +80,4 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug in ``pd.pivot_table`` where a ``Series`` is return instead of a ``DataFrame`` (:issue:`4386`) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index f01fff035a3c5..1638d062a7a18 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -3120,9 +3120,9 @@ def test_pivot_table(self): [Categorical(["a", "b", "z"], ordered=True), Categorical(["c", "d", "y"], ordered=True)], names=['A', 'B']) - expected = Series([1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan], - index=exp_index, name='values') - tm.assert_series_equal(result, expected) + exp_data = [1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan] + expected = DataFrame(exp_data, index=exp_index, columns=['values']) + tm.assert_frame_equal(result, expected) def test_count(self): diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 9e064a1d1fc99..d2dffaec4dd9c 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -78,7 +78,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', """ index = _convert_by(index) columns = _convert_by(columns) - if isinstance(aggfunc, list): pieces = [] keys = [] @@ -164,12 +163,16 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', margins_name=margins_name) # discard the top level - if values_passed and not values_multi and not table.empty: + if (values_passed and not values_multi and not table.empty and + table.columns.nlevels > 1): table = table[values[0]] if len(index) == 0 and len(columns) > 0: table = table.T + if isinstance(table, Series): + table = table.to_frame() + return table diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 5944fa1b34611..a23dbd7e25649 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -350,13 +350,13 @@ def _check_output(result, values_col, index=['A', 'B'], # no rows rtable = self.data.pivot_table(columns=['AA', 'BB'], margins=True, aggfunc=np.mean) - tm.assertIsInstance(rtable, Series) - - table = self.data.pivot_table(index=['AA', 'BB'], margins=True, - aggfunc='mean') - for item in ['DD', 'EE', 'FF']: - totals = table.loc[('All', ''), item] - self.assertEqual(totals, self.data[item].mean()) + expected = self.data.groupby(['AA', 'BB']).mean() + expected.loc[('All', ''), :] = self.data[['DD', 'EE', 'FF']].mean() + expected = (expected.stack() + .unstack(['BB', 'AA']) + .stack(['AA', 'BB']) + .to_frame()) + tm.assert_frame_equal(expected, rtable) # issue number #8349: pivot_table with margins and dictionary aggfunc data = [ @@ -485,8 +485,11 @@ def test_margins_no_values_no_cols(self): # Regression test on pivot table: no values or cols passed. result = self.data[['A', 'B']].pivot_table( index=['A', 'B'], aggfunc=len, margins=True) - result_list = result.tolist() - self.assertEqual(sum(result_list[:-1]), result_list[-1]) + expected = self.data[['A', 'B']].groupby(['A', 'B']).apply(len) + expected.loc[('All', '')] = expected.sum() + expected = expected.to_frame() + + tm.assert_frame_equal(result, expected) def test_margins_no_values_two_rows(self): # Regression test on pivot table: no values passed but rows are a @@ -854,6 +857,39 @@ def test_categorical_margins(self): table = data.pivot_table('x', 'y', 'z', margins=True) tm.assert_frame_equal(table, expected) + def test_always_return_dataframe(self): + # GH 4386 + df = DataFrame({'col1': [3, 4, 5], + 'col2': ['C', 'D', 'E'], + 'col3': [1, 3, 9]}) + result = df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) + m = MultiIndex.from_arrays([[1, 3, 9], + ['C', 'D', 'E']], + names=['col3', 'col2']) + expected = DataFrame([3, 4, 5], + index=m, columns=['col1']) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table( + 'col1', index='col3', columns='col2', aggfunc=np.sum + ) + expected = DataFrame([[3, np.NaN, np.NaN], + [np.NaN, 4, np.NaN], + [np.NaN, np.NaN, 5]], + index=Index([1, 3, 9], name='col3'), + columns=Index(['C', 'D', 'E'], name='col2')) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table('col1', index='col3', aggfunc=[np.sum]) + m = MultiIndex.from_arrays([['sum'], + ['col1']]) + expected = DataFrame([3, 4, 5], + index=Index([1, 3, 9], name='col3'), + columns=m) + tm.assert_frame_equal(result, expected) + class TestCrosstab(tm.TestCase):