Skip to content

Commit 6165db4

Browse files
toobazalanbato
authored andcommitted
BUG: Respect dtype when calling pivot_table with margins=True
closes pandas-dev#17013 This fix actually exposed an occurrence of pandas-dev#17035 in an existing test (as well as in one I added). Author: Pietro Battiston <me@pietrobattiston.it> Closes pandas-dev#17062 from toobaz/pivot_margin_int and squashes the following commits: 2737600 [Pietro Battiston] Removed now obsolete workaround 956c4f9 [Pietro Battiston] BUG: respect dtype when calling pivot_table with margins=True
1 parent e68c584 commit 6165db4

File tree

3 files changed

+59
-14
lines changed

3 files changed

+59
-14
lines changed

Diff for: doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ Reshaping
311311
- Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`)
312312
- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`)
313313
- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
314+
- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`)
314315

315316
Numeric
316317
^^^^^^^

Diff for: pandas/core/reshape/pivot.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
178178
data = data[data.notna().all(axis=1)]
179179
table = _add_margins(table, data, values, rows=index,
180180
cols=columns, aggfunc=aggfunc,
181-
margins_name=margins_name)
181+
margins_name=margins_name, fill_value=fill_value)
182182

183183
# discard the top level
184184
if values_passed and not values_multi and not table.empty and \
@@ -199,7 +199,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
199199

200200

201201
def _add_margins(table, data, values, rows, cols, aggfunc,
202-
margins_name='All'):
202+
margins_name='All', fill_value=None):
203203
if not isinstance(margins_name, compat.string_types):
204204
raise ValueError('margins_name argument must be a string')
205205

@@ -240,8 +240,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
240240
if not isinstance(marginal_result_set, tuple):
241241
return marginal_result_set
242242
result, margin_keys, row_margin = marginal_result_set
243-
244-
row_margin = row_margin.reindex(result.columns)
243+
row_margin = row_margin.reindex(result.columns, fill_value=fill_value)
245244
# populate grand margin
246245
for k in margin_keys:
247246
if isinstance(k, compat.string_types):
@@ -253,6 +252,9 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
253252

254253
row_names = result.index.names
255254
try:
255+
for dtype in set(result.dtypes):
256+
cols = result.select_dtypes([dtype]).columns
257+
margin_dummy[cols] = margin_dummy[cols].astype(dtype)
256258
result = result.append(margin_dummy)
257259
except TypeError:
258260

@@ -524,10 +526,6 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
524526
margins=margins, margins_name=margins_name,
525527
dropna=dropna, **kwargs)
526528

527-
# GH 17013:
528-
if values is None and margins:
529-
table = table.fillna(0).astype(np.int64)
530-
531529
# Post-process
532530
if normalize is not False:
533531
table = _normalize(table, normalize=normalize, margins=margins,

Diff for: pandas/tests/reshape/test_pivot.py

+52-6
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,41 @@ def _check_output(result, values_col, index=['A', 'B'],
459459

460460
tm.assert_frame_equal(result['SALARY'], expected['SALARY'])
461461

462+
def test_margins_dtype(self):
463+
# GH 17013
464+
465+
df = self.data.copy()
466+
df[['D', 'E', 'F']] = np.arange(len(df) * 3).reshape(len(df), 3)
467+
468+
mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')]
469+
mi = MultiIndex.from_tuples(mi_val, names=('A', 'B'))
470+
expected = DataFrame({'dull': [12, 21, 3, 9, 45],
471+
'shiny': [33, 0, 36, 51, 120]},
472+
index=mi).rename_axis('C', axis=1)
473+
expected['All'] = expected['dull'] + expected['shiny']
474+
475+
result = df.pivot_table(values='D', index=['A', 'B'],
476+
columns='C', margins=True,
477+
aggfunc=np.sum, fill_value=0)
478+
479+
tm.assert_frame_equal(expected, result)
480+
481+
@pytest.mark.xfail(reason='GH 17035 (len of floats is casted back to '
482+
'floats)')
483+
def test_margins_dtype_len(self):
484+
mi_val = list(product(['bar', 'foo'], ['one', 'two'])) + [('All', '')]
485+
mi = MultiIndex.from_tuples(mi_val, names=('A', 'B'))
486+
expected = DataFrame({'dull': [1, 1, 2, 1, 5],
487+
'shiny': [2, 0, 2, 2, 6]},
488+
index=mi).rename_axis('C', axis=1)
489+
expected['All'] = expected['dull'] + expected['shiny']
490+
491+
result = self.data.pivot_table(values='D', index=['A', 'B'],
492+
columns='C', margins=True,
493+
aggfunc=len, fill_value=0)
494+
495+
tm.assert_frame_equal(expected, result)
496+
462497
def test_pivot_integer_columns(self):
463498
# caused by upstream bug in unstack
464499

@@ -894,6 +929,8 @@ def test_pivot_table_margins_name_with_aggfunc_list(self):
894929
expected = pd.DataFrame(table.values, index=ix, columns=cols)
895930
tm.assert_frame_equal(table, expected)
896931

932+
@pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to '
933+
'ints)')
897934
def test_categorical_margins(self):
898935
# GH 10989
899936
df = pd.DataFrame({'x': np.arange(8),
@@ -904,14 +941,23 @@ def test_categorical_margins(self):
904941
expected.index = Index([0, 1, 'All'], name='y')
905942
expected.columns = Index([0, 1, 'All'], name='z')
906943

907-
data = df.copy()
908-
table = data.pivot_table('x', 'y', 'z', margins=True)
944+
table = df.pivot_table('x', 'y', 'z', margins=True)
909945
tm.assert_frame_equal(table, expected)
910946

911-
data = df.copy()
912-
data.y = data.y.astype('category')
913-
data.z = data.z.astype('category')
914-
table = data.pivot_table('x', 'y', 'z', margins=True)
947+
@pytest.mark.xfail(reason='GH 17035 (np.mean of ints is casted back to '
948+
'ints)')
949+
def test_categorical_margins_category(self):
950+
df = pd.DataFrame({'x': np.arange(8),
951+
'y': np.arange(8) // 4,
952+
'z': np.arange(8) % 2})
953+
954+
expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]])
955+
expected.index = Index([0, 1, 'All'], name='y')
956+
expected.columns = Index([0, 1, 'All'], name='z')
957+
958+
df.y = df.y.astype('category')
959+
df.z = df.z.astype('category')
960+
table = df.pivot_table('x', 'y', 'z', margins=True)
915961
tm.assert_frame_equal(table, expected)
916962

917963
def test_categorical_aggfunc(self):

0 commit comments

Comments
 (0)