From b5f01b7e817f565164c1118e71c0afe8ba4c8e32 Mon Sep 17 00:00:00 2001
From: Chris Zimmerman <chris-zimmerman@live.com>
Date: Thu, 19 Sep 2019 14:52:27 -0500
Subject: [PATCH 1/7] Added failing test for groupby mutation

---
 pandas/tests/groupby/test_groupby.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index bec5cbc5fecb8..8b3fad8ba911d 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -433,6 +433,23 @@ def test_frame_groupby_columns(tsframe):
         assert len(v.columns) == 2
 
 
+def test_frame_groupby_avoids_mutate():
+    # GH28523
+    df = pd.DataFrame({"A": ["foo", "bar", "foo", "bar"], "B": [1, 2, 3, 4]})
+    grouped = df.groupby("A")
+
+    expected = grouped.apply(lambda x: x)
+
+    funcs = ["sum", "prod", "min", "max", "last", "first"]
+    for fn in funcs:
+        func = getattr(grouped, fn)
+        func()
+
+    result = grouped.apply(lambda x: x)
+
+    assert tm.assert_frame_equal(expected, result)
+
+
 def test_frame_set_name_single(df):
     grouped = df.groupby("A")
 

From 1dbb12dcc83429477897d219bbdd0816d3752cfe Mon Sep 17 00:00:00 2001
From: Chris Zimmerman <chris-zimmerman@live.com>
Date: Thu, 19 Sep 2019 15:18:26 -0500
Subject: [PATCH 2/7] Switched implementation to use _group_selection_context

---
 pandas/core/groupby/groupby.py       | 66 ++++++++++++++--------------
 pandas/tests/groupby/test_groupby.py | 22 +++++++---
 2 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index e010e615e176e..68ef0cf57faed 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1364,27 +1364,27 @@ def f(self, **kwargs):
                 if "min_count" not in kwargs:
                     kwargs["min_count"] = min_count
 
-                self._set_group_selection()
+                with _group_selection_context(self):
 
-                # try a cython aggregation if we can
-                try:
-                    return self._cython_agg_general(alias, alt=npfunc, **kwargs)
-                except AssertionError as e:
-                    raise SpecificationError(str(e))
-                except Exception:
-                    pass
+                    # try a cython aggregation if we can
+                    try:
+                        return self._cython_agg_general(alias, alt=npfunc, **kwargs)
+                    except AssertionError as e:
+                        raise SpecificationError(str(e))
+                    except Exception:
+                        pass
 
-                # apply a non-cython aggregation
-                result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
+                    # apply a non-cython aggregation
+                    result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
 
-                # coerce the resulting columns if we can
-                if isinstance(result, DataFrame):
-                    for col in result.columns:
-                        result[col] = self._try_cast(result[col], self.obj[col])
-                else:
-                    result = self._try_cast(result, self.obj)
+                    # coerce the resulting columns if we can
+                    if isinstance(result, DataFrame):
+                        for col in result.columns:
+                            result[col] = self._try_cast(result[col], self.obj[col])
+                    else:
+                        result = self._try_cast(result, self.obj)
 
-                return result
+                    return result
 
             set_function_name(f, name, cls)
 
@@ -1757,28 +1757,30 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra
                 nth_values = list(set(n))
 
             nth_array = np.array(nth_values, dtype=np.intp)
-            self._set_group_selection()
 
-            mask_left = np.in1d(self._cumcount_array(), nth_array)
-            mask_right = np.in1d(self._cumcount_array(ascending=False) + 1, -nth_array)
-            mask = mask_left | mask_right
+            with _group_selection_context(self):
+                mask_left = np.in1d(self._cumcount_array(), nth_array)
+                mask_right = np.in1d(
+                    self._cumcount_array(ascending=False) + 1, -nth_array
+                )
+                mask = mask_left | mask_right
 
-            ids, _, _ = self.grouper.group_info
+                ids, _, _ = self.grouper.group_info
 
-            # Drop NA values in grouping
-            mask = mask & (ids != -1)
+                # Drop NA values in grouping
+                mask = mask & (ids != -1)
 
-            out = self._selected_obj[mask]
-            if not self.as_index:
-                return out
+                out = self._selected_obj[mask]
+                if not self.as_index:
+                    return out
 
-            result_index = self.grouper.result_index
-            out.index = result_index[ids[mask]]
+                result_index = self.grouper.result_index
+                out.index = result_index[ids[mask]]
 
-            if not self.observed and isinstance(result_index, CategoricalIndex):
-                out = out.reindex(result_index)
+                if not self.observed and isinstance(result_index, CategoricalIndex):
+                    out = out.reindex(result_index)
 
-            return out.sort_index() if self.sort else out
+                return out.sort_index() if self.sort else out
 
         # dropna is truthy
         if isinstance(n, valid_containers):
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 8b3fad8ba911d..c9e5b591c7e79 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -433,21 +433,31 @@ def test_frame_groupby_columns(tsframe):
         assert len(v.columns) == 2
 
 
-def test_frame_groupby_avoids_mutate():
+@pytest.mark.parametrize(
+    "func, args",
+    [
+        ("sum", []),
+        ("prod", []),
+        ("min", []),
+        ("max", []),
+        ("nth", [0]),
+        ("last", []),
+        ("first", []),
+    ],
+)
+def test_frame_groupby_avoids_mutate(func, args):
     # GH28523
     df = pd.DataFrame({"A": ["foo", "bar", "foo", "bar"], "B": [1, 2, 3, 4]})
     grouped = df.groupby("A")
 
     expected = grouped.apply(lambda x: x)
 
-    funcs = ["sum", "prod", "min", "max", "last", "first"]
-    for fn in funcs:
-        func = getattr(grouped, fn)
-        func()
+    fn = getattr(grouped, func)
+    fn(*args)
 
     result = grouped.apply(lambda x: x)
 
-    assert tm.assert_frame_equal(expected, result)
+    tm.assert_frame_equal(expected, result)
 
 
 def test_frame_set_name_single(df):

From bcab4a0726ec528df5f38b998b6dc8a5c29a0d3d Mon Sep 17 00:00:00 2001
From: Chris Zimmerman <chris-zimmerman@live.com>
Date: Thu, 19 Sep 2019 15:38:04 -0500
Subject: [PATCH 3/7] added whatsnew entry

---
 doc/source/whatsnew/v1.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 5e6a9093554eb..8572403e5214e 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -240,6 +240,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
 - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
 - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
+- Bug in :meth:`DataFrameGroupby` causing unexpected mutations of the groupby object (:issue:`28523`)
 
 Reshaping
 ^^^^^^^^^

From ca26b64bc5ee3b0e9dce9e7e820c3d1d48ebbba6 Mon Sep 17 00:00:00 2001
From: Chris Zimmerman <chris-zimmerman@live.com>
Date: Thu, 19 Sep 2019 19:05:45 -0500
Subject: [PATCH 4/7] updated test, removed last instance of not using context
 manager to fix funcs

---
 pandas/core/groupby/groupby.py       | 91 ++++++++++++++--------------
 pandas/tests/groupby/test_groupby.py | 16 +----
 2 files changed, 48 insertions(+), 59 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 68ef0cf57faed..c53f3b58ac4cb 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -606,56 +606,55 @@ def _make_wrapper(self, name):
             )
             raise AttributeError(msg)
 
-        self._set_group_selection()
-
-        # need to setup the selection
-        # as are not passed directly but in the grouper
-        f = getattr(self._selected_obj, name)
-        if not isinstance(f, types.MethodType):
-            return self.apply(lambda self: getattr(self, name))
-
-        f = getattr(type(self._selected_obj), name)
-
-        def wrapper(*args, **kwargs):
-            # a little trickery for aggregation functions that need an axis
-            # argument
-            kwargs_with_axis = kwargs.copy()
-            if "axis" not in kwargs_with_axis or kwargs_with_axis["axis"] is None:
-                kwargs_with_axis["axis"] = self.axis
-
-            def curried_with_axis(x):
-                return f(x, *args, **kwargs_with_axis)
-
-            def curried(x):
-                return f(x, *args, **kwargs)
-
-            # preserve the name so we can detect it when calling plot methods,
-            # to avoid duplicates
-            curried.__name__ = curried_with_axis.__name__ = name
-
-            # special case otherwise extra plots are created when catching the
-            # exception below
-            if name in base.plotting_methods:
-                return self.apply(curried)
+        with _group_selection_context(self):
+            # need to setup the selection
+            # as are not passed directly but in the grouper
+            f = getattr(self._selected_obj, name)
+            if not isinstance(f, types.MethodType):
+                return self.apply(lambda self: getattr(self, name))
+
+            f = getattr(type(self._selected_obj), name)
+
+            def wrapper(*args, **kwargs):
+                # a little trickery for aggregation functions that need an axis
+                # argument
+                kwargs_with_axis = kwargs.copy()
+                if "axis" not in kwargs_with_axis or kwargs_with_axis["axis"] is None:
+                    kwargs_with_axis["axis"] = self.axis
+
+                def curried_with_axis(x):
+                    return f(x, *args, **kwargs_with_axis)
+
+                def curried(x):
+                    return f(x, *args, **kwargs)
+
+                # preserve the name so we can detect it when calling plot methods,
+                # to avoid duplicates
+                curried.__name__ = curried_with_axis.__name__ = name
+
+                # special case otherwise extra plots are created when catching the
+                # exception below
+                if name in base.plotting_methods:
+                    return self.apply(curried)
 
-            try:
-                return self.apply(curried_with_axis)
-            except Exception:
                 try:
-                    return self.apply(curried)
+                    return self.apply(curried_with_axis)
                 except Exception:
-
-                    # related to : GH3688
-                    # try item-by-item
-                    # this can be called recursively, so need to raise
-                    # ValueError
-                    # if we don't have this method to indicated to aggregate to
-                    # mark this column as an error
                     try:
-                        return self._aggregate_item_by_item(name, *args, **kwargs)
-                    except AttributeError:
-                        # e.g. SparseArray has no flags attr
-                        raise ValueError
+                        return self.apply(curried)
+                    except Exception:
+
+                        # related to : GH3688
+                        # try item-by-item
+                        # this can be called recursively, so need to raise
+                        # ValueError
+                        # if we don't have this method to indicated to aggregate to
+                        # mark this column as an error
+                        try:
+                            return self._aggregate_item_by_item(name, *args, **kwargs)
+                        except AttributeError:
+                            # e.g. SparseArray has no flags attr
+                            raise ValueError
 
         return wrapper
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index c9e5b591c7e79..ccacf4ae7ce30 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -433,25 +433,15 @@ def test_frame_groupby_columns(tsframe):
         assert len(v.columns) == 2
 
 
-@pytest.mark.parametrize(
-    "func, args",
-    [
-        ("sum", []),
-        ("prod", []),
-        ("min", []),
-        ("max", []),
-        ("nth", [0]),
-        ("last", []),
-        ("first", []),
-    ],
-)
-def test_frame_groupby_avoids_mutate(func, args):
+def test_frame_groupby_avoids_mutate(reduction_func):
     # GH28523
+    func = reduction_func
     df = pd.DataFrame({"A": ["foo", "bar", "foo", "bar"], "B": [1, 2, 3, 4]})
     grouped = df.groupby("A")
 
     expected = grouped.apply(lambda x: x)
 
+    args = {"nth": [0], "quantile": [0.5]}.get(func, [])
     fn = getattr(grouped, func)
     fn(*args)
 

From f24c05ca22f7f36f296e94ebf1d1d3648f7587e5 Mon Sep 17 00:00:00 2001
From: Chris Zimmerman <chris-zimmerman@live.com>
Date: Sun, 20 Oct 2019 21:31:15 -0500
Subject: [PATCH 5/7] Missed a merge

---
 pandas/core/groupby/groupby.py | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index db5d0d1bf0d56..414b001a702b2 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1335,32 +1335,21 @@ def f(self, **kwargs):
                     kwargs["min_count"] = min_count
 
                 with _group_selection_context(self):
-
-<<<<<<< HEAD
                     # try a cython aggregation if we can
                     try:
                         return self._cython_agg_general(alias, alt=npfunc, **kwargs)
-                    except AssertionError as e:
-                        raise SpecificationError(str(e))
-                    except Exception:
-                        pass
-=======
-                # try a cython aggregation if we can
-                try:
-                    return self._cython_agg_general(alias, alt=npfunc, **kwargs)
-                except DataError:
-                    pass
-                except NotImplementedError as err:
-                    if "function is not implemented for this dtype" in str(err):
-                        # raised in _get_cython_function, in some cases can
-                        #  be trimmed by implementing cython funcs for more dtypes
+                    except DataError:
                         pass
-                    elif "decimal does not support skipna=True" in str(err):
-                        # FIXME: kludge for test_decimal:test_in_numeric_groupby
-                        pass
-                    else:
-                        raise
->>>>>>> upstream/master
+                    except NotImplementedError as err:
+                        if "function is not implemented for this dtype" in str(err):
+                            # raised in _get_cython_function, in some cases can
+                            #  be trimmed by implementing cython funcs for more dtypes
+                            pass
+                        elif "decimal does not support skipna=True" in str(err):
+                            # FIXME: kludge for test_decimal:test_in_numeric_groupby
+                            pass
+                        else:
+                            raise
 
                     # apply a non-cython aggregation
                     result = self.aggregate(lambda x: npfunc(x, axis=self.axis))

From 5cc3cb17bc88664d09b098d22e83532896d7519b Mon Sep 17 00:00:00 2001
From: Chris Zimmerman <chris-zimmerman@live.com>
Date: Sun, 20 Oct 2019 21:44:46 -0500
Subject: [PATCH 6/7] Fixed two tests

---
 pandas/tests/groupby/aggregate/test_other.py | 4 ++--
 pandas/tests/groupby/test_grouping.py        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 5dad868c8c3aa..e89d5cfeb10cd 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -482,13 +482,13 @@ def test_agg_timezone_round_trip():
     assert ts == grouped.first()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 0]
+    assert ts == grouped.apply(lambda x: x.iloc[0]).drop("A", 1).iloc[0, 0]
 
     ts = df["B"].iloc[2]
     assert ts == grouped.last()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 0]
+    assert ts == grouped.apply(lambda x: x.iloc[-1]).drop("A", 1).iloc[0, 0]
 
 
 def test_sum_uint64_overflow():
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 403f5f11ee768..0adc1cda6c499 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -172,7 +172,7 @@ def test_grouper_creation_bug(self):
         result = g.sum()
         assert_frame_equal(result, expected)
 
-        result = g.apply(lambda x: x.sum())
+        result = g.apply(lambda x: x.sum()).drop("A", 1)
         assert_frame_equal(result, expected)
 
         g = df.groupby(pd.Grouper(key="A", axis=0))

From 30509636e34bfaa79eddd9cfad785d716fc9a8bb Mon Sep 17 00:00:00 2001
From: Chris Zimmerman <chris-zimmerman@live.com>
Date: Mon, 21 Oct 2019 08:38:27 -0500
Subject: [PATCH 7/7] Another context needed to be set

---
 pandas/core/groupby/groupby.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 414b001a702b2..34e78ff82acee 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -648,8 +648,9 @@ def curried(x):
                     #  in tests.groupby.test_function.test_non_cython_api
                     raise ValueError
 
-            wrapper.__name__ = name
-        return wrapper
+                wrapper.__name__ = name
+
+            return wrapper
 
     def get_group(self, name, obj=None):
         """
@@ -720,7 +721,9 @@ def f(g):
             f = func
 
         # ignore SettingWithCopy here in case the user mutates
-        with option_context("mode.chained_assignment", None):
+        with option_context(
+            "mode.chained_assignment", None
+        ) as _, _group_selection_context(self) as _:
             try:
                 result = self._python_apply_general(f)
             except TypeError:
@@ -731,9 +734,7 @@ def f(g):
                 # except if the udf is trying an operation that
                 # fails on *some* columns, e.g. a numeric operation
                 # on a string grouper column
-
-                with _group_selection_context(self):
-                    return self._python_apply_general(f)
+                return self._python_apply_general(f)
 
         return result