Skip to content

Commit

Permalink
Backport PR #33761 on branch 1.0.x (REGR: fix DataFrame reduction wit…
Browse files Browse the repository at this point in the history
…h EA columns and numeric_only=True) (#34000)

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
  • Loading branch information
simonjayhawkins and jorisvandenbossche authored May 5, 2020
1 parent 23288e4 commit 7b09793
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ including other versions of pandas.
Fixed regressions
~~~~~~~~~~~~~~~~~
- Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`)
- Bug in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`).
-

.. _whatsnew_104.bug_fixes:
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7891,9 +7891,15 @@ def _get_data(axis_matters):

out_dtype = "bool" if filter_type == "bool" else None

def blk_func(values):
if isinstance(values, ExtensionArray):
return values._reduce(name, skipna=skipna, **kwds)
else:
return op(values, axis=1, skipna=skipna, **kwds)

# After possibly _get_data and transposing, we are now in the
# simple case where we can use BlockManager._reduce
res = df._data.reduce(op, axis=1, skipna=skipna, **kwds)
res = df._data.reduce(blk_func)
assert isinstance(res, dict)
if len(res):
assert len(res) == max(list(res.keys())) + 1, res.keys()
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,20 @@ def test_mean_datetimelike_numeric_only_false(self):
)
tm.assert_series_equal(result, expected)

# mean of period is not allowed
df["D"] = pd.period_range("2016", periods=3, freq="A")

with pytest.raises(TypeError, match="mean is not implemented for Period"):
df.mean(numeric_only=False)

def test_mean_extensionarray_numeric_only_true(self):
# https://github.com/pandas-dev/pandas/issues/33256
arr = np.random.randint(1000, size=(10, 5))
df = pd.DataFrame(arr, dtype="Int64")
result = df.mean(numeric_only=True)
expected = pd.DataFrame(arr).mean()
tm.assert_series_equal(result, expected)

def test_stats_mixed_type(self, float_string_frame):
# don't blow up
float_string_frame.std(1)
Expand Down

0 comments on commit 7b09793

Please sign in to comment.