From d03142e127407d29a713192a9530dccf0c80f6dc Mon Sep 17 00:00:00 2001 From: Ali Chaudry Date: Fri, 22 Nov 2019 15:30:16 -0500 Subject: [PATCH 1/7] BUG: add reset logic for Grouper if new obj is passed in (#26564) --- pandas/core/groupby/grouper.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 308d4d1864bdd..4b6e46efc0284 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -109,6 +109,13 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False): self.axis = axis self.sort = sort + self._reset_grouper() + + def _reset_grouper(self): + """ + Ensures Grouper object can be reused + See https://bit.ly/2D4Yd6V + """ self.grouper = None self.obj = None self.indexer = None @@ -155,6 +162,8 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): whether the resulting grouper should be sorted """ assert obj is not None + if not obj.equals(self.obj): + self._reset_grouper() if self.key is not None and self.level is not None: raise ValueError("The Grouper cannot specify both a key and a level!") From 05f649e2ed841e3b1d0536a5157f0f9a88e8d659 Mon Sep 17 00:00:00 2001 From: Ali Chaudry Date: Fri, 22 Nov 2019 18:57:06 -0500 Subject: [PATCH 2/7] BUG: adding test and addressing a comment on shortened URL (#26564) --- pandas/core/groupby/grouper.py | 2 +- .../tests/resample/test_resampler_grouper.py | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 4b6e46efc0284..4d66d1ee89ab5 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -114,7 +114,7 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False): def _reset_grouper(self): """ Ensures Grouper object can be reused - See https://bit.ly/2D4Yd6V + See https://github.com/pandas-dev/pandas/issues/26564 """ self.grouper = None self.obj = None diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 7efc6b0d466b9..5dd6ab5fd22c5 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -275,3 +275,23 @@ def test_median_duplicate_columns(): result = df.resample("5s").median() expected.columns = result.columns tm.assert_frame_equal(result, expected) + + +def test_same_grouper_on_different_frames(): + + df1 = pd.DataFrame( + [ + ["a", 1, 2, "05/29/2019"], + ["a", 4, 5, "05/28/2019"], + ["b", 2, 3, "05/27/2019"], + ], + columns=["type", "num1", "num2", "date"], + ).assign(date=lambda df: pd.to_datetime(df["date"])) + df2 = pd.DataFrame(columns=["type", "num1", "num2", "date"]).assign( + date=lambda df: pd.to_datetime(df["date"]) + ) + + groupbys = ["type", pd.Grouper(key="date", freq="1D")] + + df1.groupby(groupbys).head() + df2.groupby(groupbys).head() From 004373974dd4df67fa512418b3c03c5ed5834db6 Mon Sep 17 00:00:00 2001 From: Ali Chaudry Date: Mon, 25 Nov 2019 10:37:05 -0500 Subject: [PATCH 3/7] BUG: make test df simpler and change to agg funcs (#29800) --- .../tests/resample/test_resampler_grouper.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 5dd6ab5fd22c5..ea5469076ab1d 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -280,18 +280,14 @@ def test_median_duplicate_columns(): def test_same_grouper_on_different_frames(): df1 = pd.DataFrame( - [ - ["a", 1, 2, "05/29/2019"], - ["a", 4, 5, "05/28/2019"], - ["b", 2, 3, "05/27/2019"], - ], - columns=["type", "num1", "num2", "date"], - ).assign(date=lambda df: pd.to_datetime(df["date"])) - df2 = pd.DataFrame(columns=["type", "num1", "num2", "date"]).assign( - date=lambda df: pd.to_datetime(df["date"]) + [["a", 1, 2], ["a", 4, 5], ["b", 2, 3]], columns=["type", "num1", "num2"], ) + df1["date"] = pd.to_datetime(["05/29/2019", "05/28/2019", "05/27/2019"]) + + df2 = pd.DataFrame([["c", 6, 7], ["d", 8, 9]], columns=["type", "num1", "num2"],) + df2["date"] = pd.to_datetime(["02/12/2018", "03/13/2018"]) groupbys = ["type", pd.Grouper(key="date", freq="1D")] - df1.groupby(groupbys).head() - df2.groupby(groupbys).head() + df1.groupby(groupbys).sum() + df2.groupby(groupbys).count() From 8dea17d94061e29de73c477da87f9d212385d6fd Mon Sep 17 00:00:00 2001 From: Ali Chaudry Date: Thu, 6 Feb 2020 14:23:48 -0500 Subject: [PATCH 4/7] BUG: conform to standard test format (#29800) --- pandas/tests/resample/test_resampler_grouper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index b084650353d15..53f39dbcd535a 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -310,4 +310,11 @@ def test_same_grouper_on_different_frames(): groupbys = ["type", pd.Grouper(key="date", freq="1D")] df1.groupby(groupbys).sum() - df2.groupby(groupbys).count() + result = df2.groupby(groupbys).count() + + expected = pd.DataFrame({'num1': {('c', Timestamp('2018-02-12 00:00:00', freq='D')): 1, + ('d', Timestamp('2018-03-13 00:00:00', freq='D')): 1}, + 'num2': {('c', Timestamp('2018-02-12 00:00:00', freq='D')): 1, + ('d', Timestamp('2018-03-13 00:00:00', freq='D')): 1}}) + expected.index.set_names(["type", "date"], inplace=True) + tm.assert_frame_equal(result, expected) From 5a1889b85dbd87e0880b1f46e13a20f7a361ef5f Mon Sep 17 00:00:00 2001 From: Ali Chaudry Date: Thu, 6 Feb 2020 15:00:31 -0500 Subject: [PATCH 5/7] BUG: blacken test (#29800) --- pandas/tests/resample/test_resampler_grouper.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 53f39dbcd535a..73fff1eb3da0e 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -312,9 +312,17 @@ def test_same_grouper_on_different_frames(): df1.groupby(groupbys).sum() result = df2.groupby(groupbys).count() - expected = pd.DataFrame({'num1': {('c', Timestamp('2018-02-12 00:00:00', freq='D')): 1, - ('d', Timestamp('2018-03-13 00:00:00', freq='D')): 1}, - 'num2': {('c', Timestamp('2018-02-12 00:00:00', freq='D')): 1, - ('d', Timestamp('2018-03-13 00:00:00', freq='D')): 1}}) + expected = pd.DataFrame( + { + "num1": { + ("c", Timestamp("2018-02-12 00:00:00", freq="D")): 1, + ("d", Timestamp("2018-03-13 00:00:00", freq="D")): 1, + }, + "num2": { + ("c", Timestamp("2018-02-12 00:00:00", freq="D")): 1, + ("d", Timestamp("2018-03-13 00:00:00", freq="D")): 1, + }, + } + ) expected.index.set_names(["type", "date"], inplace=True) tm.assert_frame_equal(result, expected) From 12e3b4f9fe6dca2c2e56534b10d0771f48545553 Mon Sep 17 00:00:00 2001 From: Ali Chaudry Date: Fri, 7 Feb 2020 10:38:36 -0500 Subject: [PATCH 6/7] BUG: remove groupby fix and xfail test (#29800) --- pandas/core/groupby/grouper.py | 10 ---------- pandas/tests/resample/test_resampler_grouper.py | 2 ++ 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 59992177d8ade..2d723865f08ce 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -107,14 +107,6 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False): self.freq = freq self.axis = axis self.sort = sort - - self._reset_grouper() - - def _reset_grouper(self): - """ - Ensures Grouper object can be reused - See https://github.com/pandas-dev/pandas/issues/26564 - """ self.grouper = None self.obj = None self.indexer = None @@ -161,8 +153,6 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): whether the resulting grouper should be sorted """ assert obj is not None - if not obj.equals(self.obj): - self._reset_grouper() if self.key is not None and self.level is not None: raise ValueError("The Grouper cannot specify both a key and a level!") diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 73fff1eb3da0e..1ec43c5de3af4 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -1,5 +1,6 @@ from textwrap import dedent +import pytest import numpy as np from pandas.util._test_decorators import async_mark @@ -297,6 +298,7 @@ def test_median_duplicate_columns(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(reason="marked as xfail for: #26564") def test_same_grouper_on_different_frames(): df1 = pd.DataFrame( From 079c847588b4ddc17b322ae1d3d8ab06e6677e81 Mon Sep 17 00:00:00 2001 From: Ali Chaudry Date: Fri, 7 Feb 2020 12:40:32 -0500 Subject: [PATCH 7/7] BUG: fix import sort order (#29800) --- pandas/tests/resample/test_resampler_grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 1ec43c5de3af4..89384a7ad8d75 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -1,7 +1,7 @@ from textwrap import dedent -import pytest import numpy as np +import pytest from pandas.util._test_decorators import async_mark