From 0c7f0fd3f2c7dbaad433d4f2d8bb6f60a16a2632 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 May 2021 14:37:16 -0700 Subject: [PATCH] TST: xfail incorrect test_empty_groupby --- pandas/tests/groupby/test_groupby.py | 69 +++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index abfa2a23a4402..f716a3a44cd54 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1724,27 +1724,82 @@ def test_pivot_table_values_key_error(): [0], [0.0], ["a"], - [Categorical([0])], + Categorical([0]), [to_datetime(0)], - [date_range(0, 1, 1, tz="US/Eastern")], - [pd.array([0], dtype="Int64")], - [pd.array([0], dtype="Float64")], - [pd.array([False], dtype="boolean")], + date_range(0, 1, 1, tz="US/Eastern"), + pd.array([0], dtype="Int64"), + pd.array([0], dtype="Float64"), + pd.array([False], dtype="boolean"), ], ) @pytest.mark.parametrize("method", ["attr", "agg", "apply"]) @pytest.mark.parametrize( "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"] ) -def test_empty_groupby(columns, keys, values, method, op): +def test_empty_groupby(columns, keys, values, method, op, request): # GH8093 & GH26411 + if isinstance(values, Categorical) and len(keys) == 1 and method == "apply": + mark = pytest.mark.xfail(raises=TypeError, match="'str' object is not callable") + request.node.add_marker(mark) + elif ( + isinstance(values, Categorical) + and len(keys) == 1 + and op in ["idxmax", "idxmin"] + ): + mark = pytest.mark.xfail( + raises=ValueError, match="attempt to get arg(min|max) of an empty sequence" + ) + request.node.add_marker(mark) + elif ( + isinstance(values, Categorical) + and len(keys) == 1 + and not isinstance(columns, list) + ): + mark = pytest.mark.xfail( + raises=TypeError, match="'Categorical' does not implement" + ) + request.node.add_marker(mark) + elif ( + isinstance(values, Categorical) + and len(keys) == 1 + and op in ["mad", "min", "max", "sum", "prod", "skew"] + ): + mark = pytest.mark.xfail( + raises=AssertionError, match="(DataFrame|Series) are different" + ) + request.node.add_marker(mark) + elif ( + isinstance(values, Categorical) + and len(keys) == 2 + and op in ["min", "max", "sum"] + and method != "apply" + ): + mark = pytest.mark.xfail( + raises=AssertionError, match="(DataFrame|Series) are different" + ) + request.node.add_marker(mark) + elif ( + isinstance(values, pd.core.arrays.BooleanArray) + and op in ["sum", "prod"] + and method != "apply" + ): + mark = pytest.mark.xfail( + raises=AssertionError, match="(DataFrame|Series) are different" + ) + request.node.add_marker(mark) + override_dtype = None if isinstance(values[0], bool) and op in ("prod", "sum") and method != "apply": # sum/product of bools is an integer override_dtype = "int64" - df = DataFrame([3 * values], columns=list("ABC")) + df = DataFrame({"A": values, "B": values, "C": values}, columns=list("ABC")) + + if hasattr(values, "dtype"): + # check that we did the construction right + assert (df.dtypes == values.dtype).all() + df = df.iloc[:0] gb = df.groupby(keys)[columns]