diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d0dae450735a3..e2aac772a520f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -640,6 +640,7 @@ Other - Bug in :func:`api.interchange.from_dataframe` when converting an empty DataFrame object (:issue:`53155`) - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`) - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`) +- Bug in :meth:`DataFrame.pivot_table` with casting the mean of ints back to an int (:issue:`16676`) - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`) - Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`DataFrameGroupBy.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`) - Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`) @@ -650,7 +651,6 @@ Other - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`) - Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`) - Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`) -- .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 099bfde7af1d3..5c2e94735ddc5 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -20,7 +20,6 @@ from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( - is_integer_dtype, is_list_like, is_nested_list_like, is_scalar, @@ -172,28 +171,6 @@ def __internal_pivot_table( if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): agged = agged.dropna(how="all") - # gh-21133 - # we want to down cast if - # the original values are ints - # as we grouped with a NaN value - # and then dropped, coercing to floats - for v in values: - if ( - v in data - and is_integer_dtype(data[v]) - and v in agged - and not is_integer_dtype(agged[v]) - ): - if not isinstance(agged[v], ABCDataFrame) and isinstance( - data[v].dtype, np.dtype - ): - # exclude DataFrame case bc maybe_downcast_to_dtype expects - # ArrayLike - # e.g. test_pivot_table_multiindex_columns_doctest_case - # agged.columns is a MultiIndex and 'v' is indexing only - # on its first level. - agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype) - table = agged # GH17038, this check should only happen if index is defined (not None) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 79fd48de91ed5..87b23b4ceba14 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -185,10 +185,7 @@ def test_drop_multiindex_not_lexsorted(self): not_lexsorted_df = not_lexsorted_df.reset_index() assert not not_lexsorted_df.columns._is_lexsorted() - # compare the results - tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) - - expected = lexsorted_df.drop("a", axis=1) + expected = lexsorted_df.drop("a", axis=1).astype(float) with tm.assert_produces_warning(PerformanceWarning): result = not_lexsorted_df.drop("a", axis=1) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c8de1cd6785b6..7fda071b6729e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1789,9 +1789,6 @@ def test_groupby_multiindex_not_lexsorted(): not_lexsorted_df = not_lexsorted_df.reset_index() assert not not_lexsorted_df.columns._is_lexsorted() - # compare the results - tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) - expected = lexsorted_df.groupby("a").mean() with tm.assert_produces_warning(PerformanceWarning): result = not_lexsorted_df.groupby("a").mean() diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 1e122442cd40c..27d3fdbda1ded 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -204,7 +204,7 @@ def test_pivot_table_categorical(self): result = pivot_table(df, values="values", index=["A", "B"], dropna=True) exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) - expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) + expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index) tm.assert_frame_equal(result, expected) def test_pivot_table_dropna_categoricals(self, dropna): @@ -225,7 +225,7 @@ def test_pivot_table_dropna_categoricals(self, dropna): expected_columns = expected_columns.astype(CDT(categories, ordered=False)) expected_index = Series([1, 2, 3], name="B") expected = DataFrame( - [[0, 3, 6], [1, 4, 7], [2, 5, 8]], + [[0.0, 3.0, 6.0], [1.0, 4.0, 7.0], [2.0, 5.0, 8.0]], index=expected_index, columns=expected_columns, ) @@ -283,7 +283,7 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna): result = df.pivot_table(index="A", values="B", dropna=dropna) expected = DataFrame( - {"B": [2, 3, 0]}, + {"B": [2.0, 3.0, 0.0]}, index=Index( Categorical.from_codes( [0, 1, 2], categories=["low", "high", "left"], ordered=True @@ -300,7 +300,9 @@ def test_pivot_with_interval_index(self, interval_values, dropna): # GH 25814 df = DataFrame({"A": interval_values, "B": 1}) result = df.pivot_table(index="A", values="B", dropna=dropna) - expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A")) + expected = DataFrame( + {"B": 1.0}, index=Index(interval_values.unique(), name="A") + ) if not dropna: expected = expected.astype(float) tm.assert_frame_equal(result, expected) @@ -444,7 +446,7 @@ def test_pivot_no_values(self): index=Grouper(freq="A"), columns=Grouper(key="dt", freq="M") ) exp = DataFrame( - [3], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns + [3.0], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns ) tm.assert_frame_equal(res, exp) @@ -1059,7 +1061,7 @@ def test_pivot_table_multiindex_only(self, cols): result = df2.pivot_table(values="v", columns=cols) expected = DataFrame( - [[4, 5, 6]], + [[4.0, 5.0, 6.0]], columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols), index=Index(["v"]), ) @@ -1558,7 +1560,9 @@ def test_pivot_datetime_tz(self): exp_col1 = Index(["value1", "value1"]) exp_col2 = Index(["a", "b"], name="label") exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) - expected = DataFrame([[0, 3], [1, 4], [2, 5]], index=exp_idx, columns=exp_col) + expected = DataFrame( + [[0.0, 3.0], [1.0, 4.0], [2.0, 5.0]], index=exp_idx, columns=exp_col + ) result = pivot_table(df, index=["dt1"], columns=["label"], values=["value1"]) tm.assert_frame_equal(result, expected) @@ -1570,18 +1574,35 @@ def test_pivot_datetime_tz(self): name="dt2", ) exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3]) - expected = DataFrame( + expected1 = DataFrame( np.array( [ - [0, 3, 1, 2, 0, 3, 1, 2], - [1, 4, 2, 1, 1, 4, 2, 1], - [2, 5, 1, 2, 2, 5, 1, 2], + [ + 0, + 3, + 1, + 2, + ], + [1, 4, 2, 1], + [2, 5, 1, 2], ], dtype="int64", ), index=exp_idx, - columns=exp_col, + columns=exp_col[:4], + ) + expected2 = DataFrame( + np.array( + [ + [0.0, 3.0, 1.0, 2.0], + [1.0, 4.0, 2.0, 1.0], + [2.0, 5.0, 1.0, 2.0], + ], + ), + index=exp_idx, + columns=exp_col[4:], ) + expected = concat([expected1, expected2], axis=1) result = pivot_table( df, @@ -1628,7 +1649,7 @@ def test_pivot_dtaccessor(self): exp_idx = Index(["a", "b"], name="label") expected = DataFrame( - {7: [0, 3], 8: [1, 4], 9: [2, 5]}, + {7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]}, index=exp_idx, columns=Index([7, 8, 9], dtype=np.int32, name="dt1"), ) @@ -1639,7 +1660,7 @@ def test_pivot_dtaccessor(self): ) expected = DataFrame( - {7: [0, 3], 8: [1, 4], 9: [2, 5]}, + {7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]}, index=Index([1, 2], dtype=np.int32, name="dt2"), columns=Index([7, 8, 9], dtype=np.int32, name="dt1"), ) @@ -1660,7 +1681,7 @@ def test_pivot_dtaccessor(self): names=["dt1", "dt2"], ) expected = DataFrame( - np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"), + np.array([[0.0, 3.0, 1.0, 4.0, 2.0, 5.0]]), index=Index([2013], dtype=np.int32), columns=exp_col, ) @@ -1764,13 +1785,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): expected = DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) - def test_categorical_margins(self, observed, request): - if observed: - request.node.add_marker( - pytest.mark.xfail( - reason="GH#17035 (np.mean of ints is casted back to ints)" - ) - ) + def test_categorical_margins(self, observed): # GH 10989 df = DataFrame( {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} @@ -1783,13 +1798,7 @@ def test_categorical_margins(self, observed, request): table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) tm.assert_frame_equal(table, expected) - def test_categorical_margins_category(self, observed, request): - if observed: - request.node.add_marker( - pytest.mark.xfail( - reason="GH#17035 (np.mean of ints is casted back to ints)" - ) - ) + def test_categorical_margins_category(self, observed): df = DataFrame( {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} ) @@ -1816,7 +1825,7 @@ def test_margins_casted_to_float(self): result = pivot_table(df, index="D", margins=True) expected = DataFrame( - {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]}, + {"A": [3.0, 7.0, 5], "B": [2.5, 6.5, 4.5], "C": [2.0, 5.0, 3.5]}, index=Index(["X", "Y", "All"], name="D"), ) tm.assert_frame_equal(result, expected) @@ -2249,7 +2258,7 @@ def test_pivot_table_sort_false_with_multiple_values(self): index=["lastname", "firstname"], values=["height", "age"], sort=False ) expected = DataFrame( - [[173, 47], [182, 33]], + [[173.0, 47.0], [182.0, 33.0]], columns=["height", "age"], index=MultiIndex.from_tuples( [("Foo", "John"), ("Bar", "Michael")],