From dbaebd01a0e8d592bacc1b471583a999bc7d25d5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 21 Feb 2023 18:07:07 -0800 Subject: [PATCH 1/6] Note stateful tests: --- pandas/tests/base/test_misc.py | 1 + pandas/tests/groupby/test_quantile.py | 1 + pandas/tests/io/test_sql.py | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 4df55aefdcb06..04cf653651329 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -86,6 +86,7 @@ def test_ndarray_compat_properties(index_or_series_obj): @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(index_or_series_obj): + # TODO: Stateful due to the cache obj = index_or_series_obj res = obj.memory_usage() diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 79354e550d3f6..a4917b85a052c 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -314,6 +314,7 @@ def test_groupby_quantile_NA_int(any_int_ea_dtype): def test_groupby_quantile_all_na_group_masked( interpolation, val1, val2, any_numeric_ea_dtype ): + # TODO: Stateful & can raise RuntimeWarning # GH#37493 df = DataFrame( {"a": [1, 1, 1, 2], "b": [1, 2, 3, pd.NA]}, dtype=any_numeric_ea_dtype diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d5c6eccad4783..7139971f5f897 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1807,6 +1807,10 @@ def test_create_table(self): insp = inspect(temp_conn) assert insp.has_table("temp_frame") + # Cleanup + with sql.SQLDatabase(temp_conn, need_transaction=True) as pandasSQL: + pandasSQL.drop_table("temp_frame") + def test_drop_table(self): from sqlalchemy import inspect From b40d765869c54c4489689d383dd99461bb654c45 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 21 Feb 2023 19:01:07 -0800 Subject: [PATCH 2/6] Make test_memory_usage less stateful --- pandas/tests/base/test_misc.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 04cf653651329..e753b1bec8d66 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -3,10 +3,7 @@ import numpy as np import pytest -from pandas.compat import ( - IS64, - PYPY, -) +from pandas.compat import PYPY from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -86,28 +83,28 @@ def test_ndarray_compat_properties(index_or_series_obj): @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(index_or_series_obj): - # TODO: Stateful due to the cache obj = index_or_series_obj + # Clear index caches so that len(obj) == 0 results are less stateful + if isinstance(obj, Series): + is_ser = True + obj.index._engine.clear_mapping() + else: + is_ser = False + obj._engine.clear_mapping() res = obj.memory_usage() res_deep = obj.memory_usage(deep=True) - is_ser = isinstance(obj, Series) - is_object = is_object_dtype(obj) or ( - isinstance(obj, Series) and is_object_dtype(obj.index) - ) + is_object = is_object_dtype(obj) or (is_ser and is_object_dtype(obj.index)) is_categorical = is_categorical_dtype(obj.dtype) or ( - isinstance(obj, Series) and is_categorical_dtype(obj.index.dtype) + is_ser and is_categorical_dtype(obj.index.dtype) ) is_object_string = is_dtype_equal(obj, "string[python]") or ( is_ser and is_dtype_equal(obj.index.dtype, "string[python]") ) if len(obj) == 0: - if isinstance(obj, Index): - expected = 0 - else: - expected = 108 if IS64 else 64 + expected = 0 assert res_deep == res == expected elif is_object or is_categorical or is_object_string: # only deep will pick them up From 7b4a0fe5ac9c5895d8eaaa3d18204069beaf7808 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 23 Feb 2023 09:12:12 -0800 Subject: [PATCH 3/6] Add some docs about test_binops --- pandas/tests/generic/test_finalize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index c39973d7649e8..447156205c4ea 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -490,8 +490,9 @@ def test_finalize_called_eval_numexpr(): (pd.DataFrame({"A": [1]}), pd.Series([1])), ], ) -def test_binops(request, args, annotate, all_binary_operators): +def test_binops(args, annotate, all_binary_operators): # This generates 624 tests... Is that needed? + # TODO: This test is stateful, .attrs should be reset left, right = args if annotate == "both" and isinstance(left, int) or isinstance(right, int): return From 1692de526c3f75e094aef98d03958688a1c93a45 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 23 Feb 2023 10:45:29 -0800 Subject: [PATCH 4/6] Catch RuntimeWarning with astype --- pandas/core/groupby/groupby.py | 15 +++++++++------ pandas/tests/groupby/test_quantile.py | 1 - 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 39ba102ab3782..3973dc18abd59 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3184,12 +3184,15 @@ def post_processor( # Item "ExtensionDtype" of "Union[ExtensionDtype, str, # dtype[Any], Type[object]]" has no attribute "numpy_dtype" # [union-attr] - return type(orig_vals)( - vals.astype( - inference.numpy_dtype # type: ignore[union-attr] - ), - result_mask, - ) + with warnings.catch_warnings(): + # vals.astype with nan can warn with numpy >1.24 + warnings.filterwarnings("ignore", category=RuntimeWarning) + return type(orig_vals)( + vals.astype( + inference.numpy_dtype # type: ignore[union-attr] + ), + result_mask, + ) elif not ( is_integer_dtype(inference) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index a4917b85a052c..79354e550d3f6 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -314,7 +314,6 @@ def test_groupby_quantile_NA_int(any_int_ea_dtype): def test_groupby_quantile_all_na_group_masked( interpolation, val1, val2, any_numeric_ea_dtype ): - # TODO: Stateful & can raise RuntimeWarning # GH#37493 df = DataFrame( {"a": [1, 1, 1, 2], "b": [1, 2, 3, pd.NA]}, dtype=any_numeric_ea_dtype From 7ddf84c75a854de978d7e3ae6b2792e2663b45fc Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 23 Feb 2023 13:19:11 -0800 Subject: [PATCH 5/6] Make test_binops less stateful --- pandas/tests/generic/test_finalize.py | 66 ++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 447156205c4ea..d85de12566fb6 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -489,17 +489,71 @@ def test_finalize_called_eval_numexpr(): (pd.Series([1]), pd.DataFrame({"A": [1]})), (pd.DataFrame({"A": [1]}), pd.Series([1])), ], + ids=lambda x: f"({type(x[0]).__name__},{type(x[1]).__name__})", ) -def test_binops(args, annotate, all_binary_operators): +def test_binops(request, args, annotate, all_binary_operators): # This generates 624 tests... Is that needed? - # TODO: This test is stateful, .attrs should be reset left, right = args - if annotate == "both" and isinstance(left, int) or isinstance(right, int): - return - + if isinstance(left, (pd.DataFrame, pd.Series)): + left.attrs = {} + if isinstance(right, (pd.DataFrame, pd.Series)): + right.attrs = {} + + if annotate == "left" and isinstance(left, int): + pytest.skip("left is an int and doesn't support .attrs") + if annotate == "right" and isinstance(right, int): + pytest.skip("right is an int and doesn't support .attrs") + + if not (isinstance(left, int) or isinstance(right, int)) and annotate != "both": + if not all_binary_operators.__name__.startswith("r"): + if annotate == "right" and isinstance(left, type(right)): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{all_binary_operators} doesn't work when right has " + f"attrs and both are {type(left)}" + ) + ) + if not isinstance(left, type(right)): + if annotate == "left" and isinstance(left, pd.Series): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{all_binary_operators} doesn't work when the " + "objects are different Series has attrs" + ) + ) + elif annotate == "right" and isinstance(right, pd.Series): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{all_binary_operators} doesn't work when the " + "objects are different Series has attrs" + ) + ) + else: + if annotate == "left" and isinstance(left, type(right)): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{all_binary_operators} doesn't work when left has " + f"attrs and both are {type(left)}" + ) + ) + if not isinstance(left, type(right)): + if annotate == "right" and isinstance(right, pd.Series): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{all_binary_operators} doesn't work when the " + "objects are different Series has attrs" + ) + ) + elif annotate == "left" and isinstance(left, pd.Series): + request.node.add_marker( + pytest.mark.xfail( + reason=f"{all_binary_operators} doesn't work when the " + "objects are different Series has attrs" + ) + ) if annotate in {"left", "both"} and not isinstance(left, int): left.attrs = {"a": 1} - if annotate in {"left", "both"} and not isinstance(right, int): + if annotate in {"right", "both"} and not isinstance(right, int): right.attrs = {"a": 1} is_cmp = all_binary_operators in [ From a003a878417b61ece2fe8cdf81193c64a615a6ee Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 23 Feb 2023 13:28:18 -0800 Subject: [PATCH 6/6] Clarify comment --- pandas/tests/base/test_misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index e753b1bec8d66..01705ca31adcd 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -84,7 +84,7 @@ def test_ndarray_compat_properties(index_or_series_obj): @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") def test_memory_usage(index_or_series_obj): obj = index_or_series_obj - # Clear index caches so that len(obj) == 0 results are less stateful + # Clear index caches so that len(obj) == 0 report 0 memory usage if isinstance(obj, Series): is_ser = True obj.index._engine.clear_mapping()