diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f751a91cecf19..0d3912847dca0 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -543,6 +543,8 @@ Groupby/resample/rolling - Bug in :meth:`df.groupby(..).quantile() ` and :meth:`df.resample(..).quantile() ` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`) - Bug in :meth:`Rolling.median` and :meth:`Rolling.quantile` returned wrong values for :class:`BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`) - Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) +- Bug in :meth:`DataFrameGroupBy.head`, :meth:`DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`) + Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 32023576b0a91..e3fceb9bf0a06 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2742,7 +2742,10 @@ def head(self, n=5): """ self._reset_group_selection() mask = self._cumcount_array() < n - return self._selected_obj[mask] + if self.axis == 0: + return self._selected_obj[mask] + else: + return self._selected_obj.iloc[:, mask] @Substitution(name="groupby") @Substitution(see_also=_common_see_also) @@ -2776,7 +2779,10 @@ def tail(self, n=5): """ self._reset_group_selection() mask = self._cumcount_array(ascending=False) < n - return self._selected_obj[mask] + if self.axis == 0: + return self._selected_obj[mask] + else: + return self._selected_obj.iloc[:, mask] def _reindex_output( self, output: OutputFrameOrSeries, fill_value: Scalar = np.NaN diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 10394ea997775..699cd88b5c53c 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -513,6 +513,30 @@ def test_groupby_head_tail(op, n, expected_rows, columns, as_index): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "op, n, expected_cols", + [ + ("head", -1, []), + ("head", 0, []), + ("head", 1, [0, 2]), + ("head", 7, [0, 1, 2]), + ("tail", -1, []), + ("tail", 0, []), + ("tail", 1, [1, 2]), + ("tail", 7, [0, 1, 2]), + ], +) +def test_groupby_head_tail_axis_1(op, n, expected_cols): + # GH 9772 + df = DataFrame( + [[1, 2, 3], [1, 4, 5], [2, 6, 7], [3, 8, 9]], columns=["A", "B", "C"] + ) + g = df.groupby([0, 0, 1], axis=1) + expected = df.iloc[:, expected_cols] + result = getattr(g, op)(n) + tm.assert_frame_equal(result, expected) + + def test_group_selection_cache(): # GH 12839 nth, head, and tail should return same result consistently df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"])