Skip to content

Commit c30626e

Browse files
BUG: pivot_table mean of integer input casted back to int (#54263)
1 parent 42e489c commit c30626e

File tree

5 files changed

+42
-62
lines changed

5 files changed

+42
-62
lines changed

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,7 @@ Other
787787
- Bug in :func:`api.interchange.from_dataframe` when converting an empty DataFrame object (:issue:`53155`)
788788
- Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
789789
- Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`)
790+
- Bug in :meth:`DataFrame.pivot_table` with casting the mean of ints back to an int (:issue:`16676`)
790791
- Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`)
791792
- Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`DataFrameGroupBy.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`)
792793
- Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`)
@@ -798,7 +799,6 @@ Other
798799
- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
799800
- Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`)
800801
- Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`)
801-
-
802802

803803
.. ***DO NOT USE THIS SECTION***
804804

pandas/core/reshape/pivot.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
2222
from pandas.core.dtypes.common import (
23-
is_integer_dtype,
2423
is_list_like,
2524
is_nested_list_like,
2625
is_scalar,
@@ -172,28 +171,6 @@ def __internal_pivot_table(
172171
if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
173172
agged = agged.dropna(how="all")
174173

175-
# gh-21133
176-
# we want to down cast if
177-
# the original values are ints
178-
# as we grouped with a NaN value
179-
# and then dropped, coercing to floats
180-
for v in values:
181-
if (
182-
v in data
183-
and is_integer_dtype(data[v])
184-
and v in agged
185-
and not is_integer_dtype(agged[v])
186-
):
187-
if not isinstance(agged[v], ABCDataFrame) and isinstance(
188-
data[v].dtype, np.dtype
189-
):
190-
# exclude DataFrame case bc maybe_downcast_to_dtype expects
191-
# ArrayLike
192-
# e.g. test_pivot_table_multiindex_columns_doctest_case
193-
# agged.columns is a MultiIndex and 'v' is indexing only
194-
# on its first level.
195-
agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)
196-
197174
table = agged
198175

199176
# GH17038, this check should only happen if index is defined (not None)

pandas/tests/frame/methods/test_drop.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,10 +187,7 @@ def test_drop_multiindex_not_lexsorted(self):
187187
not_lexsorted_df = not_lexsorted_df.reset_index()
188188
assert not not_lexsorted_df.columns._is_lexsorted()
189189

190-
# compare the results
191-
tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
192-
193-
expected = lexsorted_df.drop("a", axis=1)
190+
expected = lexsorted_df.drop("a", axis=1).astype(float)
194191
with tm.assert_produces_warning(PerformanceWarning):
195192
result = not_lexsorted_df.drop("a", axis=1)
196193

pandas/tests/groupby/test_groupby.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,9 +1818,6 @@ def test_groupby_multiindex_not_lexsorted():
18181818
not_lexsorted_df = not_lexsorted_df.reset_index()
18191819
assert not not_lexsorted_df.columns._is_lexsorted()
18201820

1821-
# compare the results
1822-
tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
1823-
18241821
expected = lexsorted_df.groupby("a").mean()
18251822
with tm.assert_produces_warning(PerformanceWarning):
18261823
result = not_lexsorted_df.groupby("a").mean()

pandas/tests/reshape/test_pivot.py

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def test_pivot_table_categorical(self):
204204
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
205205

206206
exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
207-
expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
207+
expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
208208
tm.assert_frame_equal(result, expected)
209209

210210
def test_pivot_table_dropna_categoricals(self, dropna):
@@ -225,7 +225,7 @@ def test_pivot_table_dropna_categoricals(self, dropna):
225225
expected_columns = expected_columns.astype(CDT(categories, ordered=False))
226226
expected_index = Series([1, 2, 3], name="B")
227227
expected = DataFrame(
228-
[[0, 3, 6], [1, 4, 7], [2, 5, 8]],
228+
[[0.0, 3.0, 6.0], [1.0, 4.0, 7.0], [2.0, 5.0, 8.0]],
229229
index=expected_index,
230230
columns=expected_columns,
231231
)
@@ -283,7 +283,7 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
283283

284284
result = df.pivot_table(index="A", values="B", dropna=dropna)
285285
expected = DataFrame(
286-
{"B": [2, 3, 0]},
286+
{"B": [2.0, 3.0, 0.0]},
287287
index=Index(
288288
Categorical.from_codes(
289289
[0, 1, 2], categories=["low", "high", "left"], ordered=True
@@ -300,7 +300,9 @@ def test_pivot_with_interval_index(self, interval_values, dropna):
300300
# GH 25814
301301
df = DataFrame({"A": interval_values, "B": 1})
302302
result = df.pivot_table(index="A", values="B", dropna=dropna)
303-
expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A"))
303+
expected = DataFrame(
304+
{"B": 1.0}, index=Index(interval_values.unique(), name="A")
305+
)
304306
if not dropna:
305307
expected = expected.astype(float)
306308
tm.assert_frame_equal(result, expected)
@@ -444,7 +446,7 @@ def test_pivot_no_values(self):
444446
index=Grouper(freq="A"), columns=Grouper(key="dt", freq="M")
445447
)
446448
exp = DataFrame(
447-
[3], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns
449+
[3.0], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns
448450
)
449451
tm.assert_frame_equal(res, exp)
450452

@@ -1059,7 +1061,7 @@ def test_pivot_table_multiindex_only(self, cols):
10591061

10601062
result = df2.pivot_table(values="v", columns=cols)
10611063
expected = DataFrame(
1062-
[[4, 5, 6]],
1064+
[[4.0, 5.0, 6.0]],
10631065
columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
10641066
index=Index(["v"]),
10651067
)
@@ -1564,7 +1566,9 @@ def test_pivot_datetime_tz(self):
15641566
exp_col1 = Index(["value1", "value1"])
15651567
exp_col2 = Index(["a", "b"], name="label")
15661568
exp_col = MultiIndex.from_arrays([exp_col1, exp_col2])
1567-
expected = DataFrame([[0, 3], [1, 4], [2, 5]], index=exp_idx, columns=exp_col)
1569+
expected = DataFrame(
1570+
[[0.0, 3.0], [1.0, 4.0], [2.0, 5.0]], index=exp_idx, columns=exp_col
1571+
)
15681572
result = pivot_table(df, index=["dt1"], columns=["label"], values=["value1"])
15691573
tm.assert_frame_equal(result, expected)
15701574

@@ -1576,18 +1580,35 @@ def test_pivot_datetime_tz(self):
15761580
name="dt2",
15771581
)
15781582
exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3])
1579-
expected = DataFrame(
1583+
expected1 = DataFrame(
15801584
np.array(
15811585
[
1582-
[0, 3, 1, 2, 0, 3, 1, 2],
1583-
[1, 4, 2, 1, 1, 4, 2, 1],
1584-
[2, 5, 1, 2, 2, 5, 1, 2],
1586+
[
1587+
0,
1588+
3,
1589+
1,
1590+
2,
1591+
],
1592+
[1, 4, 2, 1],
1593+
[2, 5, 1, 2],
15851594
],
15861595
dtype="int64",
15871596
),
15881597
index=exp_idx,
1589-
columns=exp_col,
1598+
columns=exp_col[:4],
1599+
)
1600+
expected2 = DataFrame(
1601+
np.array(
1602+
[
1603+
[0.0, 3.0, 1.0, 2.0],
1604+
[1.0, 4.0, 2.0, 1.0],
1605+
[2.0, 5.0, 1.0, 2.0],
1606+
],
1607+
),
1608+
index=exp_idx,
1609+
columns=exp_col[4:],
15901610
)
1611+
expected = concat([expected1, expected2], axis=1)
15911612

15921613
result = pivot_table(
15931614
df,
@@ -1634,7 +1655,7 @@ def test_pivot_dtaccessor(self):
16341655

16351656
exp_idx = Index(["a", "b"], name="label")
16361657
expected = DataFrame(
1637-
{7: [0, 3], 8: [1, 4], 9: [2, 5]},
1658+
{7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]},
16381659
index=exp_idx,
16391660
columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
16401661
)
@@ -1645,7 +1666,7 @@ def test_pivot_dtaccessor(self):
16451666
)
16461667

16471668
expected = DataFrame(
1648-
{7: [0, 3], 8: [1, 4], 9: [2, 5]},
1669+
{7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]},
16491670
index=Index([1, 2], dtype=np.int32, name="dt2"),
16501671
columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
16511672
)
@@ -1666,7 +1687,7 @@ def test_pivot_dtaccessor(self):
16661687
names=["dt1", "dt2"],
16671688
)
16681689
expected = DataFrame(
1669-
np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"),
1690+
np.array([[0.0, 3.0, 1.0, 4.0, 2.0, 5.0]]),
16701691
index=Index([2013], dtype=np.int32),
16711692
columns=exp_col,
16721693
)
@@ -1770,13 +1791,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self):
17701791
expected = DataFrame(table.values, index=ix, columns=cols)
17711792
tm.assert_frame_equal(table, expected)
17721793

1773-
def test_categorical_margins(self, observed, request):
1774-
if observed:
1775-
request.node.add_marker(
1776-
pytest.mark.xfail(
1777-
reason="GH#17035 (np.mean of ints is casted back to ints)"
1778-
)
1779-
)
1794+
def test_categorical_margins(self, observed):
17801795
# GH 10989
17811796
df = DataFrame(
17821797
{"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
@@ -1789,13 +1804,7 @@ def test_categorical_margins(self, observed, request):
17891804
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
17901805
tm.assert_frame_equal(table, expected)
17911806

1792-
def test_categorical_margins_category(self, observed, request):
1793-
if observed:
1794-
request.node.add_marker(
1795-
pytest.mark.xfail(
1796-
reason="GH#17035 (np.mean of ints is casted back to ints)"
1797-
)
1798-
)
1807+
def test_categorical_margins_category(self, observed):
17991808
df = DataFrame(
18001809
{"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
18011810
)
@@ -1822,7 +1831,7 @@ def test_margins_casted_to_float(self):
18221831

18231832
result = pivot_table(df, index="D", margins=True)
18241833
expected = DataFrame(
1825-
{"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]},
1834+
{"A": [3.0, 7.0, 5], "B": [2.5, 6.5, 4.5], "C": [2.0, 5.0, 3.5]},
18261835
index=Index(["X", "Y", "All"], name="D"),
18271836
)
18281837
tm.assert_frame_equal(result, expected)
@@ -2255,7 +2264,7 @@ def test_pivot_table_sort_false_with_multiple_values(self):
22552264
index=["lastname", "firstname"], values=["height", "age"], sort=False
22562265
)
22572266
expected = DataFrame(
2258-
[[173, 47], [182, 33]],
2267+
[[173.0, 47.0], [182.0, 33.0]],
22592268
columns=["height", "age"],
22602269
index=MultiIndex.from_tuples(
22612270
[("Foo", "John"), ("Bar", "Michael")],

0 commit comments

Comments
 (0)