From efb61c56b49e57e058d00afb648f283841d863a4 Mon Sep 17 00:00:00 2001 From: theOehrly Date: Mon, 21 Feb 2022 16:44:24 +0100 Subject: [PATCH 1/6] add missing __finalize__ calls in indexers/iterators --- pandas/core/frame.py | 4 +-- pandas/core/generic.py | 2 +- pandas/tests/generic/test_finalize.py | 37 +++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 082a5814c2fc7..d16d389d489bc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1332,7 +1332,7 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]: columns = self.columns klass = self._constructor_sliced for k, v in zip(self.index, self.values): - s = klass(v, index=columns, name=k) + s = klass(v, index=columns, name=k).__finalize__(self) yield k, s def itertuples( @@ -3444,7 +3444,7 @@ def _ixs(self, i: int, axis: int = 0): index=self.columns, name=self.index[i], dtype=new_values.dtype, - ) + ).__finalize__(self) result._set_is_copy(self, copy=copy) return result diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f9dffaaa399f..2360661cab474 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3894,7 +3894,7 @@ class animal locomotion index=self.columns, name=self.index[loc], dtype=new_values.dtype, - ) + ).__finalize__(self) elif is_scalar(loc): result = self.iloc[:, slice(loc, loc + 1)] elif axis == 1: diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index cf92cd55a720e..1d0641c11dbfb 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -304,6 +304,7 @@ (pd.Series, [1], round), (pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])), (pd.DataFrame, frame_mi_data, operator.methodcaller("xs", "a")), + (pd.DataFrame, frame_data, operator.methodcaller("xs", 0)), (pd.Series, (1, mi), operator.methodcaller("xs", "a")), (pd.DataFrame, frame_data, operator.methodcaller("get", "A")), ( @@ -768,3 +769,39 @@ def test_finalize_frame_series_name(): df = pd.DataFrame({"name": [1, 2]}) result = pd.Series([1, 2]).__finalize__(df) assert result.name is None + + +@pytest.mark.parametrize( + "locindexer", + [ + lambda x: x.iloc[0], # returns Series + lambda x: x.iloc[:-1], + lambda x: x.iloc[[0, 1]], + lambda x: x.iloc[[True, True, False]], + lambda x: x.loc["idxA"], # returns Series + lambda x: x.loc[["idxA", "idxB"]], + lambda x: x.loc["idxA":"idxB"], + lambda x: x.loc[[True, True, False]], + lambda x: x.loc[ + pd.Series( + data=[True, True, False, True], index=["idxA", "idxB", "idxC", "idxD"] + ) + ], + lambda x: x.loc[pd.Index(data=["idxA", "idxC"])], + ], +) +def test_finalize_locators(locindexer): + df = pd.DataFrame( + {"A": [1, 2, 3], "B": [3, 4, 5], "C": [7, 8, 9]}, index=["idxA", "idxB", "idxC"] + ) + df.attrs["A"] = 1 + result = locindexer(df) + assert result.attrs == {"A": 1} + + +@pytest.mark.parametrize("iterator", ["iterrows", "items"]) +def test_finalize_iterators(iterator): + df = pd.DataFrame({"A": [1]}) + df.attrs["A"] = 1 + for _, row in getattr(df, iterator)(): + assert row.attrs == {"A": 1} From 5c3350cb10f23d11e88433cc43e166539695db12 Mon Sep 17 00:00:00 2001 From: theOehrly Date: Tue, 22 Feb 2022 09:37:18 +0100 Subject: [PATCH 2/6] fix tests (un-xfail) --- pandas/tests/generic/test_duplicate_labels.py | 9 ++------- pandas/tests/generic/test_finalize.py | 8 +++++--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index 189c5382ef114..c83c8e1d568e6 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -84,7 +84,6 @@ def test_binops(self, func, other, frame): assert df.flags.allows_duplicate_labels is False assert func(df).flags.allows_duplicate_labels is False - @not_implemented def test_preserve_getitem(self): df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) assert df[["A"]].flags.allows_duplicate_labels is False @@ -306,15 +305,11 @@ def test_series_raises(self): (operator.itemgetter(["A", "A"]), None), # loc (operator.itemgetter(["a", "a"]), "loc"), - pytest.param( - operator.itemgetter(("a", ["A", "A"])), "loc", marks=not_implemented - ), + pytest.param(operator.itemgetter(("a", ["A", "A"])), "loc"), (operator.itemgetter((["a", "a"], "A")), "loc"), # iloc (operator.itemgetter([0, 0]), "iloc"), - pytest.param( - operator.itemgetter((0, [0, 0])), "iloc", marks=not_implemented - ), + pytest.param(operator.itemgetter((0, [0, 0])), "iloc"), pytest.param(operator.itemgetter(([0, 0], 0)), "iloc"), ], ) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 1d0641c11dbfb..05471b6f5a301 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -249,14 +249,16 @@ ), pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("quantile")), - marks=not_implemented_mark, ), pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("quantile", q=[0.25, 0.75])), ), pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("quantile")), - marks=not_implemented_mark, + ( + pd.DataFrame, + ({"A": [pd.Timedelta(days=1), pd.Timedelta(days=2)]},), + operator.methodcaller("quantile"), + ), ), ( pd.DataFrame, From 1236fc27b1f28d959daf9b05b7dc6983fef418c9 Mon Sep 17 00:00:00 2001 From: theOehrly Date: Tue, 22 Feb 2022 10:43:45 +0100 Subject: [PATCH 3/6] fix typing validation error --- pandas/tests/generic/test_finalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 05471b6f5a301..c21f3e0e63526 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -782,7 +782,7 @@ def test_finalize_frame_series_name(): lambda x: x.iloc[[True, True, False]], lambda x: x.loc["idxA"], # returns Series lambda x: x.loc[["idxA", "idxB"]], - lambda x: x.loc["idxA":"idxB"], + lambda x: x.loc[slice("idxA", "idxB")], lambda x: x.loc[[True, True, False]], lambda x: x.loc[ pd.Series( From 010fe772df02f436223f3c72dd0131a4b823cc22 Mon Sep 17 00:00:00 2001 From: theOehrly Date: Sat, 26 Feb 2022 20:32:45 +0100 Subject: [PATCH 4/6] add release note --- doc/source/whatsnew/v1.4.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst index 68999b9bba50f..d95e38468d760 100644 --- a/doc/source/whatsnew/v1.4.2.rst +++ b/doc/source/whatsnew/v1.4.2.rst @@ -23,7 +23,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed metadata propagation in :meth:`NDFrame.xs`, :meth:`DataFrame.iterrows`, :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` (:issue:`28283`) - .. --------------------------------------------------------------------------- From f75a5dd8ef4e3afc6cfa03a27f3b61aa7940b129 Mon Sep 17 00:00:00 2001 From: theOehrly Date: Mon, 4 Apr 2022 17:10:19 +0200 Subject: [PATCH 5/6] fix test failures --- pandas/tests/generic/test_finalize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 7f7a2184b24d5..fec2ba6279f4d 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -265,7 +265,7 @@ ( pd.DataFrame, ({"A": [pd.Timedelta(days=1), pd.Timedelta(days=2)]},), - operator.methodcaller("quantile"), + operator.methodcaller("quantile", numeric_only=False), ), ), ( @@ -746,7 +746,7 @@ def test_categorical_accessor(method): ) def test_groupby_finalize(obj, method): obj.attrs = {"a": 1} - result = method(obj.groupby([0, 0])) + result = method(obj.groupby([0, 0], group_keys=False)) assert result.attrs == {"a": 1} From d4372aa588fe6a2913e5e32ae0d7aa75acd90243 Mon Sep 17 00:00:00 2001 From: theOehrly Date: Wed, 13 Apr 2022 00:07:49 +0200 Subject: [PATCH 6/6] move release note to 1.5.0 --- doc/source/whatsnew/v1.4.3.rst | 2 +- doc/source/whatsnew/v1.5.0.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index 1ce1e055e1f31..d53acc698c3bb 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -23,7 +23,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- Fixed metadata propagation in :meth:`NDFrame.xs`, :meth:`DataFrame.iterrows`, :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` (:issue:`28283`) +- - .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 56b1a6317472b..13c0e59df0a02 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -531,6 +531,7 @@ Indexing - Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) - Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) - Bug in :meth:`Series.asof` and :meth:`DataFrame.asof` incorrectly casting bool-dtype results to ``float64`` dtype (:issue:`16063`) +- Bug in :meth:`NDFrame.xs`, :meth:`DataFrame.iterrows`, :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` not always propagating metadata (:issue:`28283`) - Missing