diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6bb9753fcea65..b63811e08e182 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -157,7 +157,7 @@ jobs:
         pytest pandas/tests/reductions/ --array-manager
         pytest pandas/tests/generic/test_generic.py --array-manager
         pytest pandas/tests/arithmetic/ --array-manager
-        pytest pandas/tests/groupby/aggregate/ --array-manager
+        pytest pandas/tests/groupby/ --array-manager
         pytest pandas/tests/reshape/merge --array-manager
 
         # indexing subset (temporary since other tests don't pass yet)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index f4c69ea9d89db..aaf67fb1be532 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1815,6 +1815,8 @@ def count(self) -> DataFrame:
         ids, _, ngroups = self.grouper.group_info
         mask = ids != -1
 
+        using_array_manager = isinstance(data, ArrayManager)
+
         def hfunc(bvalues: ArrayLike) -> ArrayLike:
             # TODO(2DEA): reshape would not be necessary with 2D EAs
             if bvalues.ndim == 1:
@@ -1824,6 +1826,10 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
                 masked = mask & ~isna(bvalues)
 
             counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1)
+            if using_array_manager:
+                # count_level_2d return (1, N) array for single column
+                # -> extract 1D array
+                counted = counted[0, :]
             return counted
 
         new_mgr = data.grouped_reduce(hfunc)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 5004d1fe08a5b..d5b9a9806d8d5 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -84,6 +84,7 @@
     MultiIndex,
     ensure_index,
 )
+from pandas.core.internals import ArrayManager
 from pandas.core.series import Series
 from pandas.core.sorting import (
     compress_group_index,
@@ -214,6 +215,10 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
             #  TODO: can we have a workaround for EAs backed by ndarray?
             pass
 
+        elif isinstance(sdata._mgr, ArrayManager):
+            # TODO(ArrayManager) don't use fast_apply / libreduction.apply_frame_axis0
+            # for now -> relies on BlockManager internals
+            pass
         elif (
             com.get_callable_name(f) not in base.plotting_methods
             and isinstance(splitter, FrameSplitter)
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index e0447378c4542..44d929c707c87 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -270,15 +270,30 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
         -------
         ArrayManager
         """
-        # TODO ignore_failures
-        result_arrays = [func(arr) for arr in self.arrays]
+        result_arrays: List[np.ndarray] = []
+        result_indices: List[int] = []
+
+        for i, arr in enumerate(self.arrays):
+            try:
+                res = func(arr)
+            except (TypeError, NotImplementedError):
+                if not ignore_failures:
+                    raise
+                continue
+            result_arrays.append(res)
+            result_indices.append(i)
 
         if len(result_arrays) == 0:
             index = Index([None])  # placeholder
         else:
             index = Index(range(result_arrays[0].shape[0]))
 
-        return type(self)(result_arrays, [index, self.items])
+        if ignore_failures:
+            columns = self.items[np.array(result_indices, dtype="int64")]
+        else:
+            columns = self.items
+
+        return type(self)(result_arrays, [index, columns])
 
     def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:
         """
diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py
index de8335738791d..cc036bb484ff9 100644
--- a/pandas/tests/groupby/test_allowlist.py
+++ b/pandas/tests/groupby/test_allowlist.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import (
     DataFrame,
     Index,
@@ -355,7 +357,8 @@ def test_groupby_function_rename(mframe):
         "cummax",
         "cummin",
         "cumprod",
-        "describe",
+        # TODO(ArrayManager) quantile
+        pytest.param("describe", marks=td.skip_array_manager_not_yet_implemented),
         "rank",
         "quantile",
         "diff",
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 639fe308529dc..daf5c71af7488 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -7,6 +7,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -84,6 +86,7 @@ def test_apply_trivial_fail():
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) fast_apply not used
 def test_fast_apply():
     # make sure that fast apply is correctly called
     # rather than raising any kind of error
@@ -213,6 +216,7 @@ def test_group_apply_once_per_group2(capsys):
     assert result == expected
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) fast_apply not used
 @pytest.mark.xfail(reason="GH-34998")
 def test_apply_fast_slow_identical():
     # GH 31613
@@ -233,6 +237,7 @@ def fast(group):
     tm.assert_frame_equal(fast_df, slow_df)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) fast_apply not used
 @pytest.mark.parametrize(
     "func",
     [
@@ -313,6 +318,7 @@ def test_groupby_as_index_apply(df):
     tm.assert_index_equal(res, ind)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_apply_concat_preserve_names(three_group):
     grouped = three_group.groupby(["A", "B"])
 
@@ -1003,9 +1009,10 @@ def test_apply_function_with_indexing_return_column():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(reason="GH-34998")
-def test_apply_with_timezones_aware():
+def test_apply_with_timezones_aware(using_array_manager, request):
     # GH: 27212
+    if not using_array_manager:
+        request.node.add_marker(pytest.mark.xfail(reason="GH-34998"))
 
     dates = ["2001-01-01"] * 2 + ["2001-01-02"] * 2 + ["2001-01-03"] * 2
     index_no_tz = pd.DatetimeIndex(dates)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index f0356ad90a3ff..a7247c2c04761 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -81,6 +83,7 @@ def get_stats(group):
     assert result.index.names[0] == "C"
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_basic():
 
     cats = Categorical(
@@ -276,7 +279,9 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
-def test_observed(observed):
+# TODO(ArrayManager) incorrect dtype for mean()
+@td.skip_array_manager_not_yet_implemented
+def test_observed(observed, using_array_manager):
     # multiple groupers, don't re-expand the output space
     # of the grouper
     # gh-14942 (implement)
@@ -535,6 +540,7 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
         assert False, msg
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_datetime():
     # GH9049: ensure backward compatibility
     levels = pd.date_range("2014-01-01", periods=4)
@@ -600,6 +606,7 @@ def test_categorical_index():
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_describe_categorical_columns():
     # GH 11558
     cats = CategoricalIndex(
@@ -614,6 +621,7 @@ def test_describe_categorical_columns():
     tm.assert_categorical_equal(result.stack().columns.values, cats.values)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_unstack_categorical():
     # GH11558 (example is taken from the original issue)
     df = DataFrame(
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index cab5417e81445..598465a951e0f 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -367,6 +367,7 @@ def test_mad(self, gb, gni):
         result = gni.mad()
         tm.assert_frame_equal(result, expected)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
     def test_describe(self, df, gb, gni):
         # describe
         expected_index = Index([1, 3], name="A")
@@ -923,11 +924,13 @@ def test_is_monotonic_decreasing(in_vals, out_vals):
 # --------------------------------
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_apply_describe_bug(mframe):
     grouped = mframe.groupby(level="first")
     grouped.describe()  # it works!
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_series_describe_multikey():
     ts = tm.makeTimeSeries()
     grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
@@ -937,6 +940,7 @@ def test_series_describe_multikey():
     tm.assert_series_equal(result["min"], grouped.min(), check_names=False)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_series_describe_single():
     ts = tm.makeTimeSeries()
     grouped = ts.groupby(lambda x: x.month)
@@ -951,6 +955,7 @@ def test_series_index_name(df):
     assert result.index.name == "A"
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_frame_describe_multikey(tsframe):
     grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
     result = grouped.describe()
@@ -973,6 +978,7 @@ def test_frame_describe_multikey(tsframe):
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_frame_describe_tupleindex():
 
     # GH 14848 - regression from 0.19.0 to 0.19.1
@@ -992,6 +998,7 @@ def test_frame_describe_tupleindex():
         df2.groupby("key").describe()
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_frame_describe_unstacked_format():
     # GH 4792
     prices = {
@@ -1018,6 +1025,7 @@ def test_frame_describe_unstacked_format():
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 @pytest.mark.filterwarnings(
     "ignore:"
     "indexing past lexsort depth may impact performance:"
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index afde1daca74c1..8cbb9d2443cb2 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -7,6 +7,7 @@
 
 from pandas.compat import IS64
 from pandas.errors import PerformanceWarning
+import pandas.util._test_decorators as td
 
 import pandas as pd
 from pandas import (
@@ -210,6 +211,7 @@ def f(grp):
     tm.assert_series_equal(result, e)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_pass_args_kwargs(ts, tsframe):
     def f(x, q=None, axis=0):
         return np.percentile(x, q, axis=axis)
@@ -364,6 +366,7 @@ def f3(x):
         df2.groupby("a").apply(f3)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_attr_wrapper(ts):
     grouped = ts.groupby(lambda x: x.weekday())
 
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
index 9c9d1aa881890..2924348e98b56 100644
--- a/pandas/tests/groupby/test_quantile.py
+++ b/pandas/tests/groupby/test_quantile.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -8,6 +10,9 @@
 )
 import pandas._testing as tm
 
+# TODO(ArrayManager) quantile
+pytestmark = td.skip_array_manager_not_yet_implemented
+
 
 @pytest.mark.parametrize(
     "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 4956454ef2d4f..c4621d5fc0f8c 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas.core.dtypes.common import (
     ensure_platform_int,
     is_timedelta64_dtype,
@@ -161,8 +163,13 @@ def test_transform_broadcast(tsframe, ts):
             assert_fp_equal(res.xs(idx), agged[idx])
 
 
-def test_transform_axis_1(request, transformation_func):
+def test_transform_axis_1(request, transformation_func, using_array_manager):
     # GH 36308
+    if using_array_manager and transformation_func == "pct_change":
+        # TODO(ArrayManager) column-wise shift
+        request.node.add_marker(
+            pytest.mark.xfail(reason="ArrayManager: shift axis=1 not yet implemented")
+        )
     warn = None
     if transformation_func == "tshift":
         warn = FutureWarning
@@ -183,6 +190,8 @@ def test_transform_axis_1(request, transformation_func):
     tm.assert_equal(result, expected)
 
 
+# TODO(ArrayManager) groupby().transform returns DataFrame backed by BlockManager
+@td.skip_array_manager_not_yet_implemented
 def test_transform_axis_ts(tsframe):
 
     # make sure that we are setting the axes