From 59e636785ac8643cf59e66974af8a9539d38b1dd Mon Sep 17 00:00:00 2001 From: Niklas Weber Date: Sat, 20 Jun 2020 16:22:59 +0200 Subject: [PATCH 1/4] initial version of regression test for GH#34871 --- pandas/tests/frame/methods/test_replace.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 3bcc26e85e347..9d7ba21c8339c 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1403,3 +1403,23 @@ def test_replace_with_duplicate_columns(self, replacement): result["B"] = result["B"].replace(7, replacement) tm.assert_frame_equal(result, expected) + + def test_replace_period_ignore_float(self): + """ + Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df + with a Period column the old, faulty behavior is to raise TypeError + (reported for Pandas 1.0.4). The intended behavior is to just ignore + that column. This has been fixed in newer versions. This is a regression + test that triggers for the old, broken behavior, i.e. + TypeError is raised + """ + df = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) + + # buggy behavior is to raise: + # TypeError: 'value' should be a 'Period', 'NaT', or array of those. + # Got 'float' instead + df_after_replace = df.replace(1.0, 0.0) + + # 'replace()' should be ignored for these inputs, + # so new df should equal old df + tm.assert_frame_equal(df, df_after_replace) From 81b60587040c1f9a01ee4973f53b7297c5084d99 Mon Sep 17 00:00:00 2001 From: Niklas Weber Date: Sat, 20 Jun 2020 16:27:29 +0200 Subject: [PATCH 2/4] reduce test to only fail for exception test now does not check for actual equality between original and new dataframe anymore, this behavior needs treduce test to only fail for exception test now does not check for actual equality between original and new dataframe anymore, this behavior needs to be addressed in another bug fix be addressed in another bug fix --- pandas/tests/frame/methods/test_replace.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9d7ba21c8339c..1959b73509959 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1418,8 +1418,13 @@ def test_replace_period_ignore_float(self): # buggy behavior is to raise: # TypeError: 'value' should be a 'Period', 'NaT', or array of those. # Got 'float' instead - df_after_replace = df.replace(1.0, 0.0) + # so for now we want to simply call this and no exception should be + # raised, improving upon 1.0.4 behavior + # df_after_replace = df.replace(1.0, 0.0) + df.replace(1.0, 0.0) # 'replace()' should be ignored for these inputs, # so new df should equal old df - tm.assert_frame_equal(df, df_after_replace) + # this is currently still buggy, Period column becomes Object instead + # so needs fix in future. added this info to GH ticket + # tm.assert_frame_equal(df, df_after_replace) From fd4ed7eef5dd7646037df53c0b78b88271f2415e Mon Sep 17 00:00:00 2001 From: Niklas Weber Date: Sun, 21 Jun 2020 14:19:51 +0200 Subject: [PATCH 3/4] adress change request for PR don't reference old version assert expected result --- pandas/tests/frame/methods/test_replace.py | 25 ++++++---------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 1959b73509959..ff67819e159ec 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1407,24 +1407,13 @@ def test_replace_with_duplicate_columns(self, replacement): def test_replace_period_ignore_float(self): """ Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df - with a Period column the old, faulty behavior is to raise TypeError - (reported for Pandas 1.0.4). The intended behavior is to just ignore - that column. This has been fixed in newer versions. This is a regression - test that triggers for the old, broken behavior, i.e. - TypeError is raised + with a Period column the old, faulty behavior is to raise TypeError. """ df = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) + df_after_replace = df.replace(1.0, 0.0) - # buggy behavior is to raise: - # TypeError: 'value' should be a 'Period', 'NaT', or array of those. - # Got 'float' instead - # so for now we want to simply call this and no exception should be - # raised, improving upon 1.0.4 behavior - # df_after_replace = df.replace(1.0, 0.0) - df.replace(1.0, 0.0) - - # 'replace()' should be ignored for these inputs, - # so new df should equal old df - # this is currently still buggy, Period column becomes Object instead - # so needs fix in future. added this info to GH ticket - # tm.assert_frame_equal(df, df_after_replace) + df_expected_result = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) + # currently replace() changes dtype from Period to object + df_expected_result["Per"] = df_expected_result["Per"].astype(object) + + tm.assert_frame_equal(df_expected_result, df_after_replace) From df9d4769a51dd71fce07a954ab7b1eb6acee1412 Mon Sep 17 00:00:00 2001 From: Niklas Weber Date: Wed, 24 Jun 2020 08:53:40 +0200 Subject: [PATCH 4/4] test for desired behavior and xfail for now unit test for replace() now checks whether the dataframe returned is equal to how we want the function to behave, i.e. no changes to either data values or data type. this behavior needs to be implemented in the future, thux xfail-ing for now. --- pandas/tests/frame/methods/test_replace.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index ff67819e159ec..49cc892aa00d7 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1404,16 +1404,15 @@ def test_replace_with_duplicate_columns(self, replacement): tm.assert_frame_equal(result, expected) + @pytest.mark.xfail( + reason="replace() changes dtype from period to object, see GH34871", strict=True + ) def test_replace_period_ignore_float(self): """ Regression test for GH#34871: if df.replace(1.0, 0.0) is called on a df with a Period column the old, faulty behavior is to raise TypeError. """ df = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) - df_after_replace = df.replace(1.0, 0.0) - - df_expected_result = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) - # currently replace() changes dtype from Period to object - df_expected_result["Per"] = df_expected_result["Per"].astype(object) - - tm.assert_frame_equal(df_expected_result, df_after_replace) + result = df.replace(1.0, 0.0) + expected = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) + tm.assert_frame_equal(expected, result)