diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 900329d07c00a..d204fcf829534 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -815,11 +815,18 @@ def __hash__(self):
         "pyspark.pandas.tests.groupby.test_cumulative",
         "pyspark.pandas.tests.groupby.test_describe",
         "pyspark.pandas.tests.groupby.test_groupby",
+        "pyspark.pandas.tests.groupby.test_grouping",
         "pyspark.pandas.tests.groupby.test_head_tail",
         "pyspark.pandas.tests.groupby.test_index",
+        "pyspark.pandas.tests.groupby.test_missing",
         "pyspark.pandas.tests.groupby.test_missing_data",
+        "pyspark.pandas.tests.groupby.test_nlargest_nsmallest",
+        "pyspark.pandas.tests.groupby.test_raises",
+        "pyspark.pandas.tests.groupby.test_rank",
+        "pyspark.pandas.tests.groupby.test_size",
         "pyspark.pandas.tests.groupby.test_split_apply",
         "pyspark.pandas.tests.groupby.test_stat",
+        "pyspark.pandas.tests.groupby.test_value_counts",
         "pyspark.pandas.tests.test_indexing",
         "pyspark.pandas.tests.test_ops_on_diff_frames",
         "pyspark.pandas.tests.test_ops_on_diff_frames_groupby",
@@ -1113,6 +1120,13 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.groupby.test_parity_describe",
         "pyspark.pandas.tests.connect.groupby.test_parity_head_tail",
         "pyspark.pandas.tests.connect.groupby.test_parity_groupby",
+        "pyspark.pandas.tests.connect.groupby.test_parity_grouping",
+        "pyspark.pandas.tests.connect.groupby.test_parity_missing",
+        "pyspark.pandas.tests.connect.groupby.test_parity_nlargest_nsmallest",
+        "pyspark.pandas.tests.connect.groupby.test_parity_raises",
+        "pyspark.pandas.tests.connect.groupby.test_parity_rank",
+        "pyspark.pandas.tests.connect.groupby.test_parity_size",
+        "pyspark.pandas.tests.connect.groupby.test_parity_value_counts",
     ],
     excluded_python_implementations=[
         "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_grouping.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_grouping.py
new file mode 100644
index 0000000000000..8b3f9927c0f2b
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_grouping.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.pandas.tests.groupby.test_grouping import GroupingTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class GroupingParityTests(GroupingTestsMixin, PandasOnSparkTestCase, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.groupby.test_parity_grouping import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_missing.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_missing.py
new file mode 100644
index 0000000000000..f6776d9bac608
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_missing.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.pandas.tests.groupby.test_missing import MissingTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class MissingParityTests(MissingTestsMixin, PandasOnSparkTestCase, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.groupby.test_parity_missing import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_nlargest_nsmallest.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_nlargest_nsmallest.py
new file mode 100644
index 0000000000000..71c388a1d2981
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_nlargest_nsmallest.py
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.pandas.tests.groupby.test_nlargest_nsmallest import NlargestNsmallestTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class NlargestNsmallestParityTests(
+    NlargestNsmallestTestsMixin, PandasOnSparkTestCase, ReusedConnectTestCase
+):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.groupby.test_parity_nlargest_nsmallest import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_raises.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_raises.py
new file mode 100644
index 0000000000000..db122a81ebdd1
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_raises.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.pandas.tests.groupby.test_raises import RaisesTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class RaisesParityTests(RaisesTestsMixin, PandasOnSparkTestCase, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.groupby.test_parity_raises import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_rank.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_rank.py
new file mode 100644
index 0000000000000..2ad5cf07cfcaa
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_rank.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.pandas.tests.groupby.test_rank import RankTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class RankParityTests(RankTestsMixin, PandasOnSparkTestCase, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.groupby.test_parity_rank import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_size.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_size.py
new file mode 100644
index 0000000000000..2904f0cded276
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_size.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.pandas.tests.groupby.test_size import SizeTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class SizeParityTests(SizeTestsMixin, PandasOnSparkTestCase, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.groupby.test_parity_size import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/groupby/test_parity_value_counts.py b/python/pyspark/pandas/tests/connect/groupby/test_parity_value_counts.py
new file mode 100644
index 0000000000000..a9c84822006df
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/groupby/test_parity_value_counts.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.pandas.tests.groupby.test_value_counts import ValueCountsTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class ValueCountsParityTests(ValueCountsTestsMixin, PandasOnSparkTestCase, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.groupby.test_parity_value_counts import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/groupby/test_groupby.py b/python/pyspark/pandas/tests/groupby/test_groupby.py
index 4ef2d1acd811d..6ad74cdf81777 100644
--- a/python/pyspark/pandas/tests/groupby/test_groupby.py
+++ b/python/pyspark/pandas/tests/groupby/test_groupby.py
@@ -186,84 +186,6 @@ def sort(df):
                 sort(pdf.groupby(10, as_index=as_index)[[20, 30]].sum()),
             )
 
-    def test_nsmallest(self):
-        pdf = pd.DataFrame(
-            {
-                "a": [1, 1, 1, 2, 2, 2, 3, 3, 3] * 3,
-                "b": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
-                "c": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
-                "d": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
-            },
-            index=np.random.rand(9 * 3),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(
-            psdf.groupby(["a"])["b"].nsmallest(1).sort_values(),
-            pdf.groupby(["a"])["b"].nsmallest(1).sort_values(),
-        )
-        self.assert_eq(
-            psdf.groupby(["a"])["b"].nsmallest(2).sort_index(),
-            pdf.groupby(["a"])["b"].nsmallest(2).sort_index(),
-        )
-        self.assert_eq(
-            (psdf.b * 10).groupby(psdf.a).nsmallest(2).sort_index(),
-            (pdf.b * 10).groupby(pdf.a).nsmallest(2).sort_index(),
-        )
-        self.assert_eq(
-            psdf.b.rename().groupby(psdf.a).nsmallest(2).sort_index(),
-            pdf.b.rename().groupby(pdf.a).nsmallest(2).sort_index(),
-        )
-        self.assert_eq(
-            psdf.b.groupby(psdf.a.rename()).nsmallest(2).sort_index(),
-            pdf.b.groupby(pdf.a.rename()).nsmallest(2).sort_index(),
-        )
-        self.assert_eq(
-            psdf.b.rename().groupby(psdf.a.rename()).nsmallest(2).sort_index(),
-            pdf.b.rename().groupby(pdf.a.rename()).nsmallest(2).sort_index(),
-        )
-        with self.assertRaisesRegex(ValueError, "nsmallest do not support multi-index now"):
-            psdf.set_index(["a", "b"]).groupby(["c"])["d"].nsmallest(1)
-
-    def test_nlargest(self):
-        pdf = pd.DataFrame(
-            {
-                "a": [1, 1, 1, 2, 2, 2, 3, 3, 3] * 3,
-                "b": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
-                "c": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
-                "d": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
-            },
-            index=np.random.rand(9 * 3),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(
-            psdf.groupby(["a"])["b"].nlargest(1).sort_values(),
-            pdf.groupby(["a"])["b"].nlargest(1).sort_values(),
-        )
-        self.assert_eq(
-            psdf.groupby(["a"])["b"].nlargest(2).sort_index(),
-            pdf.groupby(["a"])["b"].nlargest(2).sort_index(),
-        )
-        self.assert_eq(
-            (psdf.b * 10).groupby(psdf.a).nlargest(2).sort_index(),
-            (pdf.b * 10).groupby(pdf.a).nlargest(2).sort_index(),
-        )
-        self.assert_eq(
-            psdf.b.rename().groupby(psdf.a).nlargest(2).sort_index(),
-            pdf.b.rename().groupby(pdf.a).nlargest(2).sort_index(),
-        )
-        self.assert_eq(
-            psdf.b.groupby(psdf.a.rename()).nlargest(2).sort_index(),
-            pdf.b.groupby(pdf.a.rename()).nlargest(2).sort_index(),
-        )
-        self.assert_eq(
-            psdf.b.rename().groupby(psdf.a.rename()).nlargest(2).sort_index(),
-            pdf.b.rename().groupby(pdf.a.rename()).nlargest(2).sort_index(),
-        )
-        with self.assertRaisesRegex(ValueError, "nlargest do not support multi-index now"):
-            psdf.set_index(["a", "b"]).groupby(["c"])["d"].nlargest(1)
-
     def test_shift(self):
         pdf = pd.DataFrame(
             {
@@ -331,239 +253,11 @@ def test_shift(self):
         #                pdf.groupby([('x', 'a'), ('x', 'b')]).shift(periods=-1,
         #                                                            fill_value=0).sort_index())
 
-    def test_missing(self):
-        psdf = ps.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
-
-        # DataFrameGroupBy functions
-        missing_functions = inspect.getmembers(
-            MissingPandasLikeDataFrameGroupBy, inspect.isfunction
-        )
-        unsupported_functions = [
-            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
-        ]
-        for name in unsupported_functions:
-            with self.assertRaisesRegex(
-                PandasNotImplementedError,
-                "method.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
-            ):
-                getattr(psdf.groupby("a"), name)()
-
-        deprecated_functions = [
-            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
-        ]
-        for name in deprecated_functions:
-            with self.assertRaisesRegex(
-                PandasNotImplementedError, "method.*GroupBy.*{}.*is deprecated".format(name)
-            ):
-                getattr(psdf.groupby("a"), name)()
-
-        # SeriesGroupBy functions
-        missing_functions = inspect.getmembers(MissingPandasLikeSeriesGroupBy, inspect.isfunction)
-        unsupported_functions = [
-            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
-        ]
-        for name in unsupported_functions:
-            with self.assertRaisesRegex(
-                PandasNotImplementedError,
-                "method.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
-            ):
-                getattr(psdf.a.groupby(psdf.a), name)()
-
-        deprecated_functions = [
-            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
-        ]
-        for name in deprecated_functions:
-            with self.assertRaisesRegex(
-                PandasNotImplementedError, "method.*GroupBy.*{}.*is deprecated".format(name)
-            ):
-                getattr(psdf.a.groupby(psdf.a), name)()
-
-        # DataFrameGroupBy properties
-        missing_properties = inspect.getmembers(
-            MissingPandasLikeDataFrameGroupBy, lambda o: isinstance(o, property)
-        )
-        unsupported_properties = [
-            name
-            for (name, type_) in missing_properties
-            if type_.fget.__name__ == "unsupported_property"
-        ]
-        for name in unsupported_properties:
-            with self.assertRaisesRegex(
-                PandasNotImplementedError,
-                "property.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
-            ):
-                getattr(psdf.groupby("a"), name)
-        deprecated_properties = [
-            name
-            for (name, type_) in missing_properties
-            if type_.fget.__name__ == "deprecated_property"
-        ]
-        for name in deprecated_properties:
-            with self.assertRaisesRegex(
-                PandasNotImplementedError, "property.*GroupBy.*{}.*is deprecated".format(name)
-            ):
-                getattr(psdf.groupby("a"), name)
-
-        # SeriesGroupBy properties
-        missing_properties = inspect.getmembers(
-            MissingPandasLikeSeriesGroupBy, lambda o: isinstance(o, property)
-        )
-        unsupported_properties = [
-            name
-            for (name, type_) in missing_properties
-            if type_.fget.__name__ == "unsupported_property"
-        ]
-        for name in unsupported_properties:
-            with self.assertRaisesRegex(
-                PandasNotImplementedError,
-                "property.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
-            ):
-                getattr(psdf.a.groupby(psdf.a), name)
-        deprecated_properties = [
-            name
-            for (name, type_) in missing_properties
-            if type_.fget.__name__ == "deprecated_property"
-        ]
-        for name in deprecated_properties:
-            with self.assertRaisesRegex(
-                PandasNotImplementedError, "property.*GroupBy.*{}.*is deprecated".format(name)
-            ):
-                getattr(psdf.a.groupby(psdf.a), name)
-
     @staticmethod
     def test_is_multi_agg_with_relabel():
         assert is_multi_agg_with_relabel(a="max") is False
         assert is_multi_agg_with_relabel(a_min=("a", "max"), a_max=("a", "min")) is True
 
-    def test_get_group(self):
-        pdf = pd.DataFrame(
-            [
-                ("falcon", "bird", 389.0),
-                ("parrot", "bird", 24.0),
-                ("lion", "mammal", 80.5),
-                ("monkey", "mammal", np.nan),
-            ],
-            columns=["name", "class", "max_speed"],
-            index=[0, 2, 3, 1],
-        )
-        pdf.columns.name = "Koalas"
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(
-            psdf.groupby("class").get_group("bird"),
-            pdf.groupby("class").get_group("bird"),
-        )
-        self.assert_eq(
-            psdf.groupby("class")["name"].get_group("mammal"),
-            pdf.groupby("class")["name"].get_group("mammal"),
-        )
-        self.assert_eq(
-            psdf.groupby("class")[["name"]].get_group("mammal"),
-            pdf.groupby("class")[["name"]].get_group("mammal"),
-        )
-        self.assert_eq(
-            psdf.groupby(["class", "name"]).get_group(("mammal", "lion")),
-            pdf.groupby(["class", "name"]).get_group(("mammal", "lion")),
-        )
-        self.assert_eq(
-            psdf.groupby(["class", "name"])["max_speed"].get_group(("mammal", "lion")),
-            pdf.groupby(["class", "name"])["max_speed"].get_group(("mammal", "lion")),
-        )
-        self.assert_eq(
-            psdf.groupby(["class", "name"])[["max_speed"]].get_group(("mammal", "lion")),
-            pdf.groupby(["class", "name"])[["max_speed"]].get_group(("mammal", "lion")),
-        )
-        self.assert_eq(
-            (psdf.max_speed + 1).groupby(psdf["class"]).get_group("mammal"),
-            (pdf.max_speed + 1).groupby(pdf["class"]).get_group("mammal"),
-        )
-        self.assert_eq(
-            psdf.groupby("max_speed").get_group(80.5),
-            pdf.groupby("max_speed").get_group(80.5),
-        )
-
-        self.assertRaises(KeyError, lambda: psdf.groupby("class").get_group("fish"))
-        self.assertRaises(TypeError, lambda: psdf.groupby("class").get_group(["bird", "mammal"]))
-        self.assertRaises(KeyError, lambda: psdf.groupby("class")["name"].get_group("fish"))
-        self.assertRaises(
-            TypeError, lambda: psdf.groupby("class")["name"].get_group(["bird", "mammal"])
-        )
-        self.assertRaises(
-            KeyError, lambda: psdf.groupby(["class", "name"]).get_group(("lion", "mammal"))
-        )
-        self.assertRaises(ValueError, lambda: psdf.groupby(["class", "name"]).get_group(("lion",)))
-        self.assertRaises(
-            ValueError, lambda: psdf.groupby(["class", "name"]).get_group(("mammal",))
-        )
-        self.assertRaises(ValueError, lambda: psdf.groupby(["class", "name"]).get_group("mammal"))
-
-        # MultiIndex columns
-        pdf.columns = pd.MultiIndex.from_tuples([("A", "name"), ("B", "class"), ("C", "max_speed")])
-        pdf.columns.names = ["Hello", "Koalas"]
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            psdf.groupby(("B", "class")).get_group("bird"),
-            pdf.groupby(("B", "class")).get_group("bird"),
-        )
-        self.assert_eq(
-            psdf.groupby(("B", "class"))[[("A", "name")]].get_group("mammal"),
-            pdf.groupby(("B", "class"))[[("A", "name")]].get_group("mammal"),
-        )
-        self.assert_eq(
-            psdf.groupby([("B", "class"), ("A", "name")]).get_group(("mammal", "lion")),
-            pdf.groupby([("B", "class"), ("A", "name")]).get_group(("mammal", "lion")),
-        )
-        self.assert_eq(
-            psdf.groupby([("B", "class"), ("A", "name")])[[("C", "max_speed")]].get_group(
-                ("mammal", "lion")
-            ),
-            pdf.groupby([("B", "class"), ("A", "name")])[[("C", "max_speed")]].get_group(
-                ("mammal", "lion")
-            ),
-        )
-        self.assert_eq(
-            (psdf[("C", "max_speed")] + 1).groupby(psdf[("B", "class")]).get_group("mammal"),
-            (pdf[("C", "max_speed")] + 1).groupby(pdf[("B", "class")]).get_group("mammal"),
-        )
-        self.assert_eq(
-            psdf.groupby(("C", "max_speed")).get_group(80.5),
-            pdf.groupby(("C", "max_speed")).get_group(80.5),
-        )
-
-        self.assertRaises(KeyError, lambda: psdf.groupby(("B", "class")).get_group("fish"))
-        self.assertRaises(
-            TypeError, lambda: psdf.groupby(("B", "class")).get_group(["bird", "mammal"])
-        )
-        self.assertRaises(
-            KeyError, lambda: psdf.groupby(("B", "class"))[("A", "name")].get_group("fish")
-        )
-        self.assertRaises(
-            KeyError,
-            lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group(("lion", "mammal")),
-        )
-        self.assertRaises(
-            ValueError,
-            lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group(("lion",)),
-        )
-        self.assertRaises(
-            ValueError, lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group(("mammal",))
-        )
-        self.assertRaises(
-            ValueError, lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group("mammal")
-        )
-
-    def test_getitem(self):
-        psdf = ps.DataFrame(
-            {
-                "a": [1, 1, 1, 1, 2, 2, 2, 3, 3, 3] * 3,
-                "b": [2, 3, 1, 4, 6, 9, 8, 10, 7, 5] * 3,
-                "c": [3, 5, 2, 5, 1, 2, 6, 4, 3, 6] * 3,
-            },
-            index=np.random.rand(10 * 3),
-        )
-
-        self.assertTrue(isinstance(psdf.groupby("a")["b"], SeriesGroupBy))
-
     def test_all_any(self):
         pdf = pd.DataFrame(
             {
@@ -647,19 +341,6 @@ def sort(df):
             pdf.groupby("A").all(skipna=True).sort_index(),
         )
 
-    def test_raises(self):
-        psdf = ps.DataFrame(
-            {"a": [1, 2, 6, 4, 4, 6, 4, 3, 7], "b": [4, 2, 7, 3, 3, 1, 1, 1, 2]},
-            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
-        )
-        # test raises with incorrect key
-        self.assertRaises(ValueError, lambda: psdf.groupby([]))
-        self.assertRaises(KeyError, lambda: psdf.groupby("x"))
-        self.assertRaises(KeyError, lambda: psdf.groupby(["a", "x"]))
-        self.assertRaises(KeyError, lambda: psdf.groupby("a")["x"])
-        self.assertRaises(KeyError, lambda: psdf.groupby("a")["b", "x"])
-        self.assertRaises(KeyError, lambda: psdf.groupby("a")[["b", "x"]])
-
     def test_nunique(self):
         pdf = pd.DataFrame(
             {"a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], "b": [2, 2, 2, 3, 3, 4, 4, 5, 5, 5]}
@@ -727,99 +408,6 @@ def test_unique(self):
                 for act, exp in zip(actual, expect):
                     self.assertTrue(sorted(act) == sorted(exp))
 
-    def test_value_counts(self):
-        pdf = pd.DataFrame(
-            {"A": [np.nan, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, np.nan]}, columns=["A", "B"]
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            psdf.groupby("A")["B"].value_counts().sort_index(),
-            pdf.groupby("A")["B"].value_counts().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
-            pdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
-            almost=True,
-        )
-        self.assert_eq(
-            psdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
-            pdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
-            # Returns are the same considering values and types,
-            # disable check_exact to pass the assert_eq
-            check_exact=False,
-        )
-        self.assert_eq(
-            psdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
-            pdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby("A")["B"]
-            .value_counts(sort=True, ascending=False, dropna=False)
-            .sort_index(),
-            pdf.groupby("A")["B"]
-            .value_counts(sort=True, ascending=False, dropna=False)
-            .sort_index(),
-            almost=True,
-        )
-        self.assert_eq(
-            psdf.groupby("A")["B"]
-            .value_counts(sort=True, ascending=True, dropna=False)
-            .sort_index(),
-            pdf.groupby("A")["B"]
-            .value_counts(sort=True, ascending=True, dropna=False)
-            .sort_index(),
-            almost=True,
-        )
-        self.assert_eq(
-            psdf.B.rename().groupby(psdf.A).value_counts().sort_index(),
-            pdf.B.rename().groupby(pdf.A).value_counts().sort_index(),
-        )
-        self.assert_eq(
-            psdf.B.rename().groupby(psdf.A, dropna=False).value_counts().sort_index(),
-            pdf.B.rename().groupby(pdf.A, dropna=False).value_counts().sort_index(),
-            # Returns are the same considering values and types,
-            # disable check_exact to pass the assert_eq
-            check_exact=False,
-        )
-        self.assert_eq(
-            psdf.B.groupby(psdf.A.rename()).value_counts().sort_index(),
-            pdf.B.groupby(pdf.A.rename()).value_counts().sort_index(),
-        )
-        self.assert_eq(
-            psdf.B.rename().groupby(psdf.A.rename()).value_counts().sort_index(),
-            pdf.B.rename().groupby(pdf.A.rename()).value_counts().sort_index(),
-        )
-
-    def test_size(self):
-        pdf = pd.DataFrame({"A": [1, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, 3]})
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf.groupby("A").size().sort_index(), pdf.groupby("A").size().sort_index())
-        self.assert_eq(
-            psdf.groupby("A")["B"].size().sort_index(), pdf.groupby("A")["B"].size().sort_index()
-        )
-        self.assert_eq(
-            psdf.groupby("A")[["B"]].size().sort_index(),
-            pdf.groupby("A")[["B"]].size().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby(["A", "B"]).size().sort_index(),
-            pdf.groupby(["A", "B"]).size().sort_index(),
-        )
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("X", "A"), ("Y", "B")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(
-            psdf.groupby(("X", "A")).size().sort_index(),
-            pdf.groupby(("X", "A")).size().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
-            pdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
-        )
-
     def test_diff(self):
         pdf = pd.DataFrame(
             {
@@ -869,56 +457,6 @@ def test_diff(self):
             pdf.groupby([("x", "a"), ("x", "b")]).diff().sort_index(),
         )
 
-    def test_rank(self):
-        pdf = pd.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6] * 3,
-                "b": [1, 1, 2, 3, 5, 8] * 3,
-                "c": [1, 4, 9, 16, 25, 36] * 3,
-            },
-            index=np.random.rand(6 * 3),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.groupby("b").rank().sort_index(), pdf.groupby("b").rank().sort_index())
-        self.assert_eq(
-            psdf.groupby(["a", "b"]).rank().sort_index(),
-            pdf.groupby(["a", "b"]).rank().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby(["b"])["a"].rank().sort_index(),
-            pdf.groupby(["b"])["a"].rank().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
-            pdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby(psdf.b // 5).rank().sort_index(),
-            pdf.groupby(pdf.b // 5).rank().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby(psdf.b // 5)["a"].rank().sort_index(),
-            pdf.groupby(pdf.b // 5)["a"].rank().sort_index(),
-        )
-
-        self.assert_eq(psdf.groupby("b").rank().sum(), pdf.groupby("b").rank().sum())
-        self.assert_eq(psdf.groupby(["b"])["a"].rank().sum(), pdf.groupby(["b"])["a"].rank().sum())
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(
-            psdf.groupby(("x", "b")).rank().sort_index(),
-            pdf.groupby(("x", "b")).rank().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
-            pdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
-        )
-
 
 class GroupByTests(GroupByTestsMixin, PandasOnSparkTestCase, TestUtils):
     pass
diff --git a/python/pyspark/pandas/tests/groupby/test_grouping.py b/python/pyspark/pandas/tests/groupby/test_grouping.py
new file mode 100644
index 0000000000000..13aa86ad9b4e2
--- /dev/null
+++ b/python/pyspark/pandas/tests/groupby/test_grouping.py
@@ -0,0 +1,171 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+import pandas as pd
+import numpy as np
+import pyspark.pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.groupby import SeriesGroupBy
+
+
+class GroupingTestsMixin:
+    def test_get_group(self):
+        pdf = pd.DataFrame(
+            [
+                ("falcon", "bird", 389.0),
+                ("parrot", "bird", 24.0),
+                ("lion", "mammal", 80.5),
+                ("monkey", "mammal", np.nan),
+            ],
+            columns=["name", "class", "max_speed"],
+            index=[0, 2, 3, 1],
+        )
+        pdf.columns.name = "Koalas"
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            psdf.groupby("class").get_group("bird"),
+            pdf.groupby("class").get_group("bird"),
+        )
+        self.assert_eq(
+            psdf.groupby("class")["name"].get_group("mammal"),
+            pdf.groupby("class")["name"].get_group("mammal"),
+        )
+        self.assert_eq(
+            psdf.groupby("class")[["name"]].get_group("mammal"),
+            pdf.groupby("class")[["name"]].get_group("mammal"),
+        )
+        self.assert_eq(
+            psdf.groupby(["class", "name"]).get_group(("mammal", "lion")),
+            pdf.groupby(["class", "name"]).get_group(("mammal", "lion")),
+        )
+        self.assert_eq(
+            psdf.groupby(["class", "name"])["max_speed"].get_group(("mammal", "lion")),
+            pdf.groupby(["class", "name"])["max_speed"].get_group(("mammal", "lion")),
+        )
+        self.assert_eq(
+            psdf.groupby(["class", "name"])[["max_speed"]].get_group(("mammal", "lion")),
+            pdf.groupby(["class", "name"])[["max_speed"]].get_group(("mammal", "lion")),
+        )
+        self.assert_eq(
+            (psdf.max_speed + 1).groupby(psdf["class"]).get_group("mammal"),
+            (pdf.max_speed + 1).groupby(pdf["class"]).get_group("mammal"),
+        )
+        self.assert_eq(
+            psdf.groupby("max_speed").get_group(80.5),
+            pdf.groupby("max_speed").get_group(80.5),
+        )
+
+        self.assertRaises(KeyError, lambda: psdf.groupby("class").get_group("fish"))
+        self.assertRaises(TypeError, lambda: psdf.groupby("class").get_group(["bird", "mammal"]))
+        self.assertRaises(KeyError, lambda: psdf.groupby("class")["name"].get_group("fish"))
+        self.assertRaises(
+            TypeError, lambda: psdf.groupby("class")["name"].get_group(["bird", "mammal"])
+        )
+        self.assertRaises(
+            KeyError, lambda: psdf.groupby(["class", "name"]).get_group(("lion", "mammal"))
+        )
+        self.assertRaises(ValueError, lambda: psdf.groupby(["class", "name"]).get_group(("lion",)))
+        self.assertRaises(
+            ValueError, lambda: psdf.groupby(["class", "name"]).get_group(("mammal",))
+        )
+        self.assertRaises(ValueError, lambda: psdf.groupby(["class", "name"]).get_group("mammal"))
+
+        # MultiIndex columns
+        pdf.columns = pd.MultiIndex.from_tuples([("A", "name"), ("B", "class"), ("C", "max_speed")])
+        pdf.columns.names = ["Hello", "Koalas"]
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            psdf.groupby(("B", "class")).get_group("bird"),
+            pdf.groupby(("B", "class")).get_group("bird"),
+        )
+        self.assert_eq(
+            psdf.groupby(("B", "class"))[[("A", "name")]].get_group("mammal"),
+            pdf.groupby(("B", "class"))[[("A", "name")]].get_group("mammal"),
+        )
+        self.assert_eq(
+            psdf.groupby([("B", "class"), ("A", "name")]).get_group(("mammal", "lion")),
+            pdf.groupby([("B", "class"), ("A", "name")]).get_group(("mammal", "lion")),
+        )
+        self.assert_eq(
+            psdf.groupby([("B", "class"), ("A", "name")])[[("C", "max_speed")]].get_group(
+                ("mammal", "lion")
+            ),
+            pdf.groupby([("B", "class"), ("A", "name")])[[("C", "max_speed")]].get_group(
+                ("mammal", "lion")
+            ),
+        )
+        self.assert_eq(
+            (psdf[("C", "max_speed")] + 1).groupby(psdf[("B", "class")]).get_group("mammal"),
+            (pdf[("C", "max_speed")] + 1).groupby(pdf[("B", "class")]).get_group("mammal"),
+        )
+        self.assert_eq(
+            psdf.groupby(("C", "max_speed")).get_group(80.5),
+            pdf.groupby(("C", "max_speed")).get_group(80.5),
+        )
+
+        self.assertRaises(KeyError, lambda: psdf.groupby(("B", "class")).get_group("fish"))
+        self.assertRaises(
+            TypeError, lambda: psdf.groupby(("B", "class")).get_group(["bird", "mammal"])
+        )
+        self.assertRaises(
+            KeyError, lambda: psdf.groupby(("B", "class"))[("A", "name")].get_group("fish")
+        )
+        self.assertRaises(
+            KeyError,
+            lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group(("lion", "mammal")),
+        )
+        self.assertRaises(
+            ValueError,
+            lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group(("lion",)),
+        )
+        self.assertRaises(
+            ValueError, lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group(("mammal",))
+        )
+        self.assertRaises(
+            ValueError, lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group("mammal")
+        )
+
+    def test_getitem(self):
+        psdf = ps.DataFrame(
+            {
+                "a": [1, 1, 1, 1, 2, 2, 2, 3, 3, 3] * 3,
+                "b": [2, 3, 1, 4, 6, 9, 8, 10, 7, 5] * 3,
+                "c": [3, 5, 2, 5, 1, 2, 6, 4, 3, 6] * 3,
+            },
+            index=np.random.rand(10 * 3),
+        )
+
+        self.assertTrue(isinstance(psdf.groupby("a")["b"], SeriesGroupBy))
+
+
+class GroupingTests(GroupingTestsMixin, PandasOnSparkTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.groupby.test_grouping import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/groupby/test_missing.py b/python/pyspark/pandas/tests/groupby/test_missing.py
new file mode 100644
index 0000000000000..d3c70aa1a9852
--- /dev/null
+++ b/python/pyspark/pandas/tests/groupby/test_missing.py
@@ -0,0 +1,144 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+import inspect
+
+import pyspark.pandas as ps
+from pyspark.pandas.exceptions import PandasNotImplementedError
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.missing.groupby import (
+    MissingPandasLikeDataFrameGroupBy,
+    MissingPandasLikeSeriesGroupBy,
+)
+
+
+class MissingTestsMixin:
+    def test_missing(self):
+        psdf = ps.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
+
+        # DataFrameGroupBy functions
+        missing_functions = inspect.getmembers(
+            MissingPandasLikeDataFrameGroupBy, inspect.isfunction
+        )
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.groupby("a"), name)()
+
+        deprecated_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
+        ]
+        for name in deprecated_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "method.*GroupBy.*{}.*is deprecated".format(name)
+            ):
+                getattr(psdf.groupby("a"), name)()
+
+        # SeriesGroupBy functions
+        missing_functions = inspect.getmembers(MissingPandasLikeSeriesGroupBy, inspect.isfunction)
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.a.groupby(psdf.a), name)()
+
+        deprecated_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
+        ]
+        for name in deprecated_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "method.*GroupBy.*{}.*is deprecated".format(name)
+            ):
+                getattr(psdf.a.groupby(psdf.a), name)()
+
+        # DataFrameGroupBy properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeDataFrameGroupBy, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.groupby("a"), name)
+        deprecated_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "deprecated_property"
+        ]
+        for name in deprecated_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "property.*GroupBy.*{}.*is deprecated".format(name)
+            ):
+                getattr(psdf.groupby("a"), name)
+
+        # SeriesGroupBy properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeSeriesGroupBy, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.a.groupby(psdf.a), name)
+        deprecated_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "deprecated_property"
+        ]
+        for name in deprecated_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError, "property.*GroupBy.*{}.*is deprecated".format(name)
+            ):
+                getattr(psdf.a.groupby(psdf.a), name)
+
+
+class MissingTests(MissingTestsMixin, PandasOnSparkTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.groupby.test_missing import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/groupby/test_nlargest_nsmallest.py b/python/pyspark/pandas/tests/groupby/test_nlargest_nsmallest.py
new file mode 100644
index 0000000000000..e74f880bce8d9
--- /dev/null
+++ b/python/pyspark/pandas/tests/groupby/test_nlargest_nsmallest.py
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+import pandas as pd
+import numpy as np
+import pyspark.pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class NlargestNsmallestTestsMixin:
+    def test_nlargest(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 1, 1, 2, 2, 2, 3, 3, 3] * 3,
+                "b": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
+                "c": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
+                "d": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
+            },
+            index=np.random.rand(9 * 3),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            psdf.groupby(["a"])["b"].nlargest(1).sort_values(),
+            pdf.groupby(["a"])["b"].nlargest(1).sort_values(),
+        )
+        self.assert_eq(
+            psdf.groupby(["a"])["b"].nlargest(2).sort_index(),
+            pdf.groupby(["a"])["b"].nlargest(2).sort_index(),
+        )
+        self.assert_eq(
+            (psdf.b * 10).groupby(psdf.a).nlargest(2).sort_index(),
+            (pdf.b * 10).groupby(pdf.a).nlargest(2).sort_index(),
+        )
+        self.assert_eq(
+            psdf.b.rename().groupby(psdf.a).nlargest(2).sort_index(),
+            pdf.b.rename().groupby(pdf.a).nlargest(2).sort_index(),
+        )
+        self.assert_eq(
+            psdf.b.groupby(psdf.a.rename()).nlargest(2).sort_index(),
+            pdf.b.groupby(pdf.a.rename()).nlargest(2).sort_index(),
+        )
+        self.assert_eq(
+            psdf.b.rename().groupby(psdf.a.rename()).nlargest(2).sort_index(),
+            pdf.b.rename().groupby(pdf.a.rename()).nlargest(2).sort_index(),
+        )
+        with self.assertRaisesRegex(ValueError, "nlargest do not support multi-index now"):
+            psdf.set_index(["a", "b"]).groupby(["c"])["d"].nlargest(1)
+
+    def test_nsmallest(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 1, 1, 2, 2, 2, 3, 3, 3] * 3,
+                "b": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
+                "c": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
+                "d": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
+            },
+            index=np.random.rand(9 * 3),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            psdf.groupby(["a"])["b"].nsmallest(1).sort_values(),
+            pdf.groupby(["a"])["b"].nsmallest(1).sort_values(),
+        )
+        self.assert_eq(
+            psdf.groupby(["a"])["b"].nsmallest(2).sort_index(),
+            pdf.groupby(["a"])["b"].nsmallest(2).sort_index(),
+        )
+        self.assert_eq(
+            (psdf.b * 10).groupby(psdf.a).nsmallest(2).sort_index(),
+            (pdf.b * 10).groupby(pdf.a).nsmallest(2).sort_index(),
+        )
+        self.assert_eq(
+            psdf.b.rename().groupby(psdf.a).nsmallest(2).sort_index(),
+            pdf.b.rename().groupby(pdf.a).nsmallest(2).sort_index(),
+        )
+        self.assert_eq(
+            psdf.b.groupby(psdf.a.rename()).nsmallest(2).sort_index(),
+            pdf.b.groupby(pdf.a.rename()).nsmallest(2).sort_index(),
+        )
+        self.assert_eq(
+            psdf.b.rename().groupby(psdf.a.rename()).nsmallest(2).sort_index(),
+            pdf.b.rename().groupby(pdf.a.rename()).nsmallest(2).sort_index(),
+        )
+        with self.assertRaisesRegex(ValueError, "nsmallest do not support multi-index now"):
+            psdf.set_index(["a", "b"]).groupby(["c"])["d"].nsmallest(1)
+
+
+class NlargestNsmallestTests(NlargestNsmallestTestsMixin, PandasOnSparkTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.groupby.test_nlargest_nsmallest import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/groupby/test_raises.py b/python/pyspark/pandas/tests/groupby/test_raises.py
new file mode 100644
index 0000000000000..4fc134f5e06b1
--- /dev/null
+++ b/python/pyspark/pandas/tests/groupby/test_raises.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class RaisesTestsMixin:
+    def test_raises(self):
+        psdf = ps.DataFrame(
+            {"a": [1, 2, 6, 4, 4, 6, 4, 3, 7], "b": [4, 2, 7, 3, 3, 1, 1, 1, 2]},
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
+        )
+        # test raises with incorrect key
+        self.assertRaises(ValueError, lambda: psdf.groupby([]))
+        self.assertRaises(KeyError, lambda: psdf.groupby("x"))
+        self.assertRaises(KeyError, lambda: psdf.groupby(["a", "x"]))
+        self.assertRaises(KeyError, lambda: psdf.groupby("a")["x"])
+        self.assertRaises(KeyError, lambda: psdf.groupby("a")["b", "x"])
+        self.assertRaises(KeyError, lambda: psdf.groupby("a")[["b", "x"]])
+
+
+class RaisesTests(RaisesTestsMixin, PandasOnSparkTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.groupby.test_raises import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/groupby/test_rank.py b/python/pyspark/pandas/tests/groupby/test_rank.py
new file mode 100644
index 0000000000000..f1efbd3f8a4a5
--- /dev/null
+++ b/python/pyspark/pandas/tests/groupby/test_rank.py
@@ -0,0 +1,91 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+import pandas as pd
+import numpy as np
+import pyspark.pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class RankTestsMixin:
+    def test_rank(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6] * 3,
+                "b": [1, 1, 2, 3, 5, 8] * 3,
+                "c": [1, 4, 9, 16, 25, 36] * 3,
+            },
+            index=np.random.rand(6 * 3),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.groupby("b").rank().sort_index(), pdf.groupby("b").rank().sort_index())
+        self.assert_eq(
+            psdf.groupby(["a", "b"]).rank().sort_index(),
+            pdf.groupby(["a", "b"]).rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(["b"])["a"].rank().sort_index(),
+            pdf.groupby(["b"])["a"].rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
+            pdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(psdf.b // 5).rank().sort_index(),
+            pdf.groupby(pdf.b // 5).rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(psdf.b // 5)["a"].rank().sort_index(),
+            pdf.groupby(pdf.b // 5)["a"].rank().sort_index(),
+        )
+
+        self.assert_eq(psdf.groupby("b").rank().sum(), pdf.groupby("b").rank().sum())
+        self.assert_eq(psdf.groupby(["b"])["a"].rank().sum(), pdf.groupby(["b"])["a"].rank().sum())
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.groupby(("x", "b")).rank().sort_index(),
+            pdf.groupby(("x", "b")).rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
+            pdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
+        )
+
+
+class RankTests(RankTestsMixin, PandasOnSparkTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.groupby.test_rank import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/groupby/test_size.py b/python/pyspark/pandas/tests/groupby/test_size.py
new file mode 100644
index 0000000000000..8d187ba377a37
--- /dev/null
+++ b/python/pyspark/pandas/tests/groupby/test_size.py
@@ -0,0 +1,70 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+import pandas as pd
+import pyspark.pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class SizeTestsMixin:
+    def test_size(self):
+        pdf = pd.DataFrame({"A": [1, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, 3]})
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf.groupby("A").size().sort_index(), pdf.groupby("A").size().sort_index())
+        self.assert_eq(
+            psdf.groupby("A")["B"].size().sort_index(), pdf.groupby("A")["B"].size().sort_index()
+        )
+        self.assert_eq(
+            psdf.groupby("A")[["B"]].size().sort_index(),
+            pdf.groupby("A")[["B"]].size().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(["A", "B"]).size().sort_index(),
+            pdf.groupby(["A", "B"]).size().sort_index(),
+        )
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("X", "A"), ("Y", "B")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.groupby(("X", "A")).size().sort_index(),
+            pdf.groupby(("X", "A")).size().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
+            pdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
+        )
+
+
+class SizeTests(SizeTestsMixin, PandasOnSparkTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.groupby.test_size import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/groupby/test_value_counts.py b/python/pyspark/pandas/tests/groupby/test_value_counts.py
new file mode 100644
index 0000000000000..5ed2d1b0583dc
--- /dev/null
+++ b/python/pyspark/pandas/tests/groupby/test_value_counts.py
@@ -0,0 +1,104 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+import pandas as pd
+import numpy as np
+import pyspark.pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class ValueCountsTestsMixin:
+    def test_value_counts(self):
+        pdf = pd.DataFrame(
+            {"A": [np.nan, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, np.nan]}, columns=["A", "B"]
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            psdf.groupby("A")["B"].value_counts().sort_index(),
+            pdf.groupby("A")["B"].value_counts().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
+            pdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
+            almost=True,
+        )
+        self.assert_eq(
+            psdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
+            pdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
+            # Returns are the same considering values and types,
+            # disable check_exact to pass the assert_eq
+            check_exact=False,
+        )
+        self.assert_eq(
+            psdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
+            pdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby("A")["B"]
+            .value_counts(sort=True, ascending=False, dropna=False)
+            .sort_index(),
+            pdf.groupby("A")["B"]
+            .value_counts(sort=True, ascending=False, dropna=False)
+            .sort_index(),
+            almost=True,
+        )
+        self.assert_eq(
+            psdf.groupby("A")["B"]
+            .value_counts(sort=True, ascending=True, dropna=False)
+            .sort_index(),
+            pdf.groupby("A")["B"]
+            .value_counts(sort=True, ascending=True, dropna=False)
+            .sort_index(),
+            almost=True,
+        )
+        self.assert_eq(
+            psdf.B.rename().groupby(psdf.A).value_counts().sort_index(),
+            pdf.B.rename().groupby(pdf.A).value_counts().sort_index(),
+        )
+        self.assert_eq(
+            psdf.B.rename().groupby(psdf.A, dropna=False).value_counts().sort_index(),
+            pdf.B.rename().groupby(pdf.A, dropna=False).value_counts().sort_index(),
+            # Returns are the same considering values and types,
+            # disable check_exact to pass the assert_eq
+            check_exact=False,
+        )
+        self.assert_eq(
+            psdf.B.groupby(psdf.A.rename()).value_counts().sort_index(),
+            pdf.B.groupby(pdf.A.rename()).value_counts().sort_index(),
+        )
+        self.assert_eq(
+            psdf.B.rename().groupby(psdf.A.rename()).value_counts().sort_index(),
+            pdf.B.rename().groupby(pdf.A.rename()).value_counts().sort_index(),
+        )
+
+
+class ValueCountsTests(ValueCountsTestsMixin, PandasOnSparkTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.groupby.test_value_counts import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)