From 121e2dfb9cd667157ae15654265a9a8976f34a31 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 31 Jul 2021 12:09:59 -0700
Subject: [PATCH 1/3] DEPR: dropping nuisance columns in rolling methods

---
 pandas/core/window/rolling.py       | 11 +++++++++
 pandas/tests/window/test_api.py     |  3 ++-
 pandas/tests/window/test_ewm.py     |  5 +++--
 pandas/tests/window/test_groupby.py | 34 +++++++++++++++-------------
 pandas/tests/window/test_numba.py   | 35 ++++++++++++++++++++---------
 5 files changed, 60 insertions(+), 28 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 8a253726ab0b6..fc03604603c36 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -32,6 +32,7 @@
 from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import doc
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     ensure_float64,
@@ -436,6 +437,16 @@ def hfunc2d(values: ArrayLike) -> ArrayLike:
             new_mgr = mgr.apply_2d(hfunc2d, ignore_failures=True)
         else:
             new_mgr = mgr.apply(hfunc, ignore_failures=True)
+
+        if 0 != len(new_mgr.items) != len(mgr.items):
+            # ignore_failures dropped nuisance columns
+            warnings.warn(
+                "Dropping of nuisance columns in rolling operations "
+                "is deprecated; in a future version this will raise TypeError. "
+                "Select only valid columns before calling the operation.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         out = obj._constructor(new_mgr)
 
         return self._resolve_output(out, obj)
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index e70d079739003..f39f5792decc3 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -68,7 +68,8 @@ def tests_skip_nuisance():
 def test_skip_sum_object_raises():
     df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
     r = df.rolling(window=3)
-    result = r.sum()
+    with tm.assert_produces_warning(FutureWarning, match="nuisance columns"):
+        result = r.sum()
     expected = DataFrame(
         {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
         columns=list("AB"),
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
index 8da902ea830d1..e36d2f24a2f15 100644
--- a/pandas/tests/window/test_ewm.py
+++ b/pandas/tests/window/test_ewm.py
@@ -116,8 +116,9 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
     data = np.arange(10.0)
     data[::2] = np.nan
     df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)})
-    result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
-    expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
+    with tm.assert_produces_warning(FutureWarning, match="nuisance columns"):
+        result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
+        expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
index 03b43026c9a6c..61031b8d0b931 100644
--- a/pandas/tests/window/test_groupby.py
+++ b/pandas/tests/window/test_groupby.py
@@ -923,7 +923,11 @@ def test_methods(self, method, expected_data):
         )
         tm.assert_frame_equal(result, expected)
 
-        expected = df.groupby("A").apply(lambda x: getattr(x.ewm(com=1.0), method)())
+        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            expected = df.groupby("A").apply(
+                lambda x: getattr(x.ewm(com=1.0), method)()
+            )
+
         # There may be a bug in the above statement; not returning the correct index
         tm.assert_frame_equal(result.reset_index(drop=True), expected)
 
@@ -955,7 +959,8 @@ def test_pairwise_methods(self, method, expected_data):
     def test_times(self, times_frame):
         # GH 40951
         halflife = "23 days"
-        result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
+        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
         expected = DataFrame(
             {
                 "B": [
@@ -992,22 +997,21 @@ def test_times(self, times_frame):
     def test_times_vs_apply(self, times_frame):
         # GH 40951
         halflife = "23 days"
-        result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
-        expected = (
-            times_frame.groupby("A")
-            .apply(lambda x: x.ewm(halflife=halflife, times="C").mean())
-            .iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]]
-            .reset_index(drop=True)
-        )
+        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
+            expected = (
+                times_frame.groupby("A")
+                .apply(lambda x: x.ewm(halflife=halflife, times="C").mean())
+                .iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]]
+                .reset_index(drop=True)
+            )
         tm.assert_frame_equal(result.reset_index(drop=True), expected)
 
     def test_times_array(self, times_frame):
         # GH 40951
         halflife = "23 days"
-        result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
-        expected = (
-            times_frame.groupby("A")
-            .ewm(halflife=halflife, times=times_frame["C"].values)
-            .mean()
-        )
+        gb = times_frame.groupby("A")
+        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            result = gb.ewm(halflife=halflife, times="C").mean()
+            expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean()
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index a8ec9086e6b02..258312235e2c4 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -170,26 +170,38 @@ def test_invalid_engine_kwargs(self, grouper):
                 engine="cython", engine_kwargs={"nopython": True}
             )
 
-    @pytest.mark.parametrize(
-        "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
-    )
+    @pytest.mark.parametrize("grouper", ["None", "groupby"])
     def test_cython_vs_numba(
         self, grouper, nogil, parallel, nopython, ignore_na, adjust
     ):
+        if grouper == "None":
+            grouper = lambda x: x
+            warn = FutureWarning
+        else:
+            grouper = lambda x: x.groupby("A")
+            warn = None
+
         df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
         ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
 
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
-        result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
-        expected = ewm.mean(engine="cython")
+        with tm.assert_produces_warning(warn, match="nuisance"):
+            result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
+            expected = ewm.mean(engine="cython")
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
-    )
+    @pytest.mark.parametrize("grouper", ["None", "groupby"])
     def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na):
         # GH 40951
+
+        if grouper == "None":
+            grouper = lambda x: x
+            warn = FutureWarning
+        else:
+            grouper = lambda x: x.groupby("A")
+            warn = None
+
         halflife = "23 days"
         times = to_datetime(
             [
@@ -207,8 +219,11 @@ def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_
         )
 
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
-        result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
-        expected = ewm.mean(engine="cython")
+
+        # TODO: why only in these cases?
+        with tm.assert_produces_warning(warn, match="nuisance"):
+            result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
+            expected = ewm.mean(engine="cython")
 
         tm.assert_frame_equal(result, expected)
 

From 0c8f3329ff5a932d9927d5656abd60143f99918e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 31 Jul 2021 12:12:22 -0700
Subject: [PATCH 2/3] whatsnew

---
 doc/source/whatsnew/v1.4.0.rst      | 1 +
 pandas/core/window/rolling.py       | 2 +-
 pandas/tests/window/test_api.py     | 1 +
 pandas/tests/window/test_ewm.py     | 1 +
 pandas/tests/window/test_groupby.py | 4 ++++
 pandas/tests/window/test_numba.py   | 2 ++
 6 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index ce0158b05c2ab..52ee4d65045e2 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -159,6 +159,7 @@ Deprecations
 - Deprecated treating ``numpy.datetime64`` objects as UTC times when passed to the :class:`Timestamp` constructor along with a timezone. In a future version, these will be treated as wall-times. To retain the old behavior, use ``Timestamp(dt64).tz_localize("UTC").tz_convert(tz)`` (:issue:`24559`)
 - Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
 - Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
+- Deprecated dropping of nuisance columns in :class:`Rolling` aggregations (:issue:`42738`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index fc03604603c36..104e09b2a178b 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -439,7 +439,7 @@ def hfunc2d(values: ArrayLike) -> ArrayLike:
             new_mgr = mgr.apply(hfunc, ignore_failures=True)
 
         if 0 != len(new_mgr.items) != len(mgr.items):
-            # ignore_failures dropped nuisance columns
+            # GH#42738 ignore_failures dropped nuisance columns
             warnings.warn(
                 "Dropping of nuisance columns in rolling operations "
                 "is deprecated; in a future version this will raise TypeError. "
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index f39f5792decc3..eadd72d936678 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -69,6 +69,7 @@ def test_skip_sum_object_raises():
     df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
     r = df.rolling(window=3)
     with tm.assert_produces_warning(FutureWarning, match="nuisance columns"):
+        # GH#42738
         result = r.sum()
     expected = DataFrame(
         {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
index e36d2f24a2f15..011f44a674014 100644
--- a/pandas/tests/window/test_ewm.py
+++ b/pandas/tests/window/test_ewm.py
@@ -117,6 +117,7 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
     data[::2] = np.nan
     df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)})
     with tm.assert_produces_warning(FutureWarning, match="nuisance columns"):
+        # GH#42738
         result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
         expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
index 61031b8d0b931..2523ec585a491 100644
--- a/pandas/tests/window/test_groupby.py
+++ b/pandas/tests/window/test_groupby.py
@@ -924,6 +924,7 @@ def test_methods(self, method, expected_data):
         tm.assert_frame_equal(result, expected)
 
         with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            # GH#42738
             expected = df.groupby("A").apply(
                 lambda x: getattr(x.ewm(com=1.0), method)()
             )
@@ -960,6 +961,7 @@ def test_times(self, times_frame):
         # GH 40951
         halflife = "23 days"
         with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            # GH#42738
             result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
         expected = DataFrame(
             {
@@ -998,6 +1000,7 @@ def test_times_vs_apply(self, times_frame):
         # GH 40951
         halflife = "23 days"
         with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            # GH#42738
             result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
             expected = (
                 times_frame.groupby("A")
@@ -1012,6 +1015,7 @@ def test_times_array(self, times_frame):
         halflife = "23 days"
         gb = times_frame.groupby("A")
         with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            # GH#42738
             result = gb.ewm(halflife=halflife, times="C").mean()
             expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean()
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index 258312235e2c4..586ca7ee259f8 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -186,6 +186,7 @@ def test_cython_vs_numba(
 
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
         with tm.assert_produces_warning(warn, match="nuisance"):
+            # GH#42738
             result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
             expected = ewm.mean(engine="cython")
 
@@ -222,6 +223,7 @@ def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_
 
         # TODO: why only in these cases?
         with tm.assert_produces_warning(warn, match="nuisance"):
+            # GH#42738
             result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
             expected = ewm.mean(engine="cython")
 

From 083d21bb77bb2ff497bb5dbc9a743d722d9dba55 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 8 Aug 2021 21:03:15 -0700
Subject: [PATCH 3/3] add dropped columns to warning message

---
 pandas/core/window/rolling.py   | 4 +++-
 pandas/tests/window/test_api.py | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 47367ba11f104..d4c0eb946505d 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -440,10 +440,12 @@ def hfunc2d(values: ArrayLike) -> ArrayLike:
 
         if 0 != len(new_mgr.items) != len(mgr.items):
             # GH#42738 ignore_failures dropped nuisance columns
+            dropped = mgr.items.difference(new_mgr.items)
             warnings.warn(
                 "Dropping of nuisance columns in rolling operations "
                 "is deprecated; in a future version this will raise TypeError. "
-                "Select only valid columns before calling the operation.",
+                "Select only valid columns before calling the operation. "
+                f"Dropped columns were {dropped}",
                 FutureWarning,
                 stacklevel=find_stack_level(),
             )
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index eadd72d936678..7a5fcebfd23d7 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -68,7 +68,8 @@ def tests_skip_nuisance():
 def test_skip_sum_object_raises():
     df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
     r = df.rolling(window=3)
-    with tm.assert_produces_warning(FutureWarning, match="nuisance columns"):
+    msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
         # GH#42738
         result = r.sum()
     expected = DataFrame(