From 7fc7c57937a5f8bc8041a9fb02abf5aa9b1e39f5 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Mar 2023 19:01:42 -0700 Subject: [PATCH 1/5] DEPR: flags --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/_testing/asserters.py | 8 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 33 ++- pandas/tests/copy_view/test_methods.py | 22 +- .../tests/frame/methods/test_reset_index.py | 8 +- pandas/tests/frame/test_api.py | 21 +- pandas/tests/generic/test_duplicate_labels.py | 257 ++++++++++-------- pandas/tests/generic/test_generic.py | 7 +- pandas/tests/test_flags.py | 27 +- pandas/tests/util/test_assert_frame_equal.py | 11 +- pandas/tests/util/test_assert_series_equal.py | 4 +- 12 files changed, 244 insertions(+), 157 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 243ba3b8df119..cc2918067a879 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -113,6 +113,7 @@ Deprecations - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`) - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`) +- Deprecated :meth:`DataFrame.flags`, :meth:`DataFrame.set_flags`, :meth:`Series.flags`, :meth:`Series.set_flags` (:issue:`#51280`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index e25e8388bc4cd..785d27ec61004 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -871,7 +871,9 @@ def assert_series_equal( raise_assert_detail(obj, "Series length are different", msg1, msg2) if check_flags: - assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" + assert ( + left._flags == right._flags + ), f"{repr(left._flags)} != {repr(right._flags)}" if check_index: # GH #38183 @@ -1144,7 +1146,9 @@ def assert_frame_equal( ) if check_flags: - assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" + assert ( + left._flags == right._flags + ), f"{repr(left._flags)} != {repr(right._flags)}" # index comparison assert_index_equal( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0aadef410b8f8..ac680e952d037 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4810,7 +4810,7 @@ def insert( """ if allow_duplicates is lib.no_default: allow_duplicates = False - if allow_duplicates and not self.flags.allows_duplicate_labels: + if allow_duplicates and not self._flags.allows_duplicate_labels: raise ValueError( "Cannot specify 'allow_duplicates=True' when " "'self.flags.allows_duplicate_labels' is False." diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d5a316c7336da..fddff459e8827 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -396,6 +396,12 @@ def flags(self) -> Flags: False >>> df.flags["allows_duplicate_labels"] = True """ + warnings.warn( + f"{type(self).__name__}.flags is deprecated and will be removed " + "in a future version", + FutureWarning, + stacklevel=find_stack_level(), + ) return self._flags @final @@ -446,6 +452,13 @@ def set_flags( >>> df2.flags.allows_duplicate_labels False """ + warnings.warn( + f"{type(self).__name__}.set_flags is deprecated and will be removed " + "in a future version", + FutureWarning, + stacklevel=find_stack_level(), + ) + df = self.copy(deep=copy and not using_copy_on_write()) if allows_duplicate_labels is not None: df.flags["allows_duplicate_labels"] = allows_duplicate_labels @@ -2019,7 +2032,7 @@ def __getstate__(self) -> dict[str, Any]: "_typ": self._typ, "_metadata": self._metadata, "attrs": self.attrs, - "_flags": {k: self.flags[k] for k in self.flags._keys}, + "_flags": {k: self._flags[k] for k in self._flags._keys}, **meta, } @@ -4298,7 +4311,7 @@ def __delitem__(self, key) -> None: @final def _check_inplace_and_allows_duplicate_labels(self, inplace): - if inplace and not self.flags.allows_duplicate_labels: + if inplace and not self._flags.allows_duplicate_labels: raise ValueError( "Cannot specify 'inplace=True' when " "'self.flags.allows_duplicate_labels' is False." @@ -6000,26 +6013,26 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self: stable across pandas releases. """ if isinstance(other, NDFrame): - for name in other.attrs: - self.attrs[name] = other.attrs[name] + for name in other._attrs: + self._attrs[name] = other._attrs[name] - self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels + self._flags.allows_duplicate_labels = other._flags.allows_duplicate_labels # For subclasses using _metadata. for name in set(self._metadata) & set(other._metadata): assert isinstance(name, str) object.__setattr__(self, name, getattr(other, name, None)) if method == "concat": - attrs = other.objs[0].attrs - check_attrs = all(objs.attrs == attrs for objs in other.objs[1:]) + attrs = other.objs[0]._attrs + check_attrs = all(objs._attrs == attrs for objs in other.objs[1:]) if check_attrs: for name in attrs: - self.attrs[name] = attrs[name] + self._attrs[name] = attrs[name] allows_duplicate_labels = all( - x.flags.allows_duplicate_labels for x in other.objs + x._flags.allows_duplicate_labels for x in other.objs ) - self.flags.allows_duplicate_labels = allows_duplicate_labels + self._flags.allows_duplicate_labels = allows_duplicate_labels return self diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 1d8f1dea7d478..91c738d416068 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -104,8 +104,12 @@ def test_copy_shallow(using_copy_on_write): def test_methods_copy_keyword( request, method, copy, using_copy_on_write, using_array_manager ): + warn = None + msg = "(DataFrame|Series).set_flags is deprecated" index = None - if "to_timestamp" in request.node.callspec.id: + if "set_flags" in request.node.callspec.id: + warn = FutureWarning + elif "to_timestamp" in request.node.callspec.id: index = period_range("2012-01-01", freq="D", periods=3) elif "to_period" in request.node.callspec.id: index = date_range("2012-01-01", freq="D", periods=3) @@ -115,7 +119,8 @@ def test_methods_copy_keyword( index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels") df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index) - df2 = method(df, copy=copy) + with tm.assert_produces_warning(warn, match=msg): + df2 = method(df, copy=copy) share_memory = using_copy_on_write or copy is False @@ -171,8 +176,12 @@ def test_methods_copy_keyword( ], ) def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write): + warn = None + msg = "(DataFrame|Series).set_flags is deprecated" index = None - if "to_timestamp" in request.node.callspec.id: + if "set_flags" in request.node.callspec.id: + warn = FutureWarning + elif "to_timestamp" in request.node.callspec.id: index = period_range("2012-01-01", freq="D", periods=3) elif "to_period" in request.node.callspec.id: index = date_range("2012-01-01", freq="D", periods=3) @@ -184,7 +193,8 @@ def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write) index = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]]) ser = Series([1, 2, 3], index=index) - ser2 = method(ser, copy=copy) + with tm.assert_produces_warning(warn, match=msg): + ser2 = method(ser, copy=copy) share_memory = using_copy_on_write or copy is False @@ -1203,7 +1213,9 @@ def test_series_set_axis(using_copy_on_write): def test_set_flags(using_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() - ser2 = ser.set_flags(allows_duplicate_labels=False) + msg = "Series.set_flags is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser2 = ser.set_flags(allows_duplicate_labels=False) assert np.shares_memory(ser, ser2) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 8a11a59cdcb58..02e3f1cac8ac9 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -439,7 +439,9 @@ def test_reset_index_duplicate_columns_allow( ): # GH#44755 reset_index with duplicate column labels df = multiindex_df.rename_axis("A") - df = df.set_flags(allows_duplicate_labels=flag) + msg = "DataFrame.set_flags is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = df.set_flags(allows_duplicate_labels=flag) if flag and allow_duplicates: result = df.reset_index(allow_duplicates=allow_duplicates) @@ -462,7 +464,9 @@ def test_reset_index_duplicate_columns_allow( @pytest.mark.parametrize("flag", [False, True]) def test_reset_index_duplicate_columns_default(self, multiindex_df, flag): df = multiindex_df.rename_axis("A") - df = df.set_flags(allows_duplicate_labels=flag) + msg = "DataFrame.set_flags is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = df.set_flags(allows_duplicate_labels=flag) msg = r"cannot insert \('A', ''\), already exists" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index e0d9d6c281fd5..4459f4afc469b 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -328,19 +328,25 @@ def test_set_flags( obj = obj["A"] key = 0 - result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels) + set_msg = "(DataFrame|Series).set_flags is deprecated" + with tm.assert_produces_warning(FutureWarning, match=set_msg): + result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels) + get_msg = "(DataFrame|Series).flags is deprecated" if allows_duplicate_labels is None: # We don't update when it's not provided - assert result.flags.allows_duplicate_labels is True + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert result.flags.allows_duplicate_labels is True else: - assert result.flags.allows_duplicate_labels is allows_duplicate_labels + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert result.flags.allows_duplicate_labels is allows_duplicate_labels # We made a copy assert obj is not result # We didn't mutate obj - assert obj.flags.allows_duplicate_labels is True + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert obj.flags.allows_duplicate_labels is True # But we didn't copy data if frame_or_series is Series: @@ -357,9 +363,10 @@ def test_set_flags( result.iloc[key] = 1 # Now we do copy. - result = obj.set_flags( - copy=True, allows_duplicate_labels=allows_duplicate_labels - ) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + result = obj.set_flags( + copy=True, allows_duplicate_labels=allows_duplicate_labels + ) result.iloc[key] = 10 assert obj.iloc[key] == 1 diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index 06170d2241f01..f200c2112fb02 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -9,6 +9,9 @@ not_implemented = pytest.mark.xfail(reason="Not implemented.") +get_msg = "(DataFrame|Series).flags is deprecated" +set_msg = "(DataFrame|Series).set_flags is deprecated" + # ---------------------------------------------------------------------------- # Preservation @@ -25,10 +28,13 @@ class TestPreserves: ) def test_construction_ok(self, cls, data): result = cls(data) - assert result.flags.allows_duplicate_labels is True + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert result.flags.allows_duplicate_labels is True - result = cls(data).set_flags(allows_duplicate_labels=False) - assert result.flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=set_msg): + result = cls(data).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert result.flags.allows_duplicate_labels is False @pytest.mark.parametrize( "func", @@ -42,8 +48,12 @@ def test_construction_ok(self, cls, data): ], ) def test_preserved_series(self, func): - s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) - assert func(s).flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=set_msg): + s = pd.Series([0, 1], index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert func(s).flags.allows_duplicate_labels is False @pytest.mark.parametrize( "other", [pd.Series(0, index=["a", "b", "c"]), pd.Series(0, index=["a", "b"])] @@ -51,54 +61,68 @@ def test_preserved_series(self, func): # TODO: frame @not_implemented def test_align(self, other): - s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + s = pd.Series([0, 1], index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) a, b = s.align(other) - assert a.flags.allows_duplicate_labels is False - assert b.flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert a.flags.allows_duplicate_labels is False + assert b.flags.allows_duplicate_labels is False def test_preserved_frame(self): - df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ) - assert df.loc[["a"]].flags.allows_duplicate_labels is False - assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=set_msg): + df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert df.loc[["a"]].flags.allows_duplicate_labels is False + assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False def test_to_frame(self): - ser = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False) - assert ser.to_frame().flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=set_msg): + ser = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert ser.to_frame().flags.allows_duplicate_labels is False @pytest.mark.parametrize("func", ["add", "sub"]) @pytest.mark.parametrize("frame", [False, True]) @pytest.mark.parametrize("other", [1, pd.Series([1, 2], name="A")]) def test_binops(self, func, other, frame): - df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) if frame: df = df.to_frame() if isinstance(other, pd.Series) and frame: other = other.to_frame() func = operator.methodcaller(func, other) - assert df.flags.allows_duplicate_labels is False - assert func(df).flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert df.flags.allows_duplicate_labels is False + assert func(df).flags.allows_duplicate_labels is False def test_preserve_getitem(self): - df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) - assert df[["A"]].flags.allows_duplicate_labels is False - assert df["A"].flags.allows_duplicate_labels is False - assert df.loc[0].flags.allows_duplicate_labels is False - assert df.loc[[0]].flags.allows_duplicate_labels is False - assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=set_msg): + df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert df[["A"]].flags.allows_duplicate_labels is False + assert df["A"].flags.allows_duplicate_labels is False + assert df.loc[0].flags.allows_duplicate_labels is False + assert df.loc[[0]].flags.allows_duplicate_labels is False + assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False def test_ndframe_getitem_caching_issue(self, request, using_copy_on_write): if not using_copy_on_write: request.node.add_marker(pytest.mark.xfail(reason="Unclear behavior.")) # NDFrame.__getitem__ will cache the first df['A']. May need to # invalidate that cache? Update the cached entries? - df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False) - assert df["A"].flags.allows_duplicate_labels is False - df.flags.allows_duplicate_labels = True - assert df["A"].flags.allows_duplicate_labels is True + with tm.assert_produces_warning(FutureWarning, match=set_msg): + df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert df["A"].flags.allows_duplicate_labels is False + df.flags.allows_duplicate_labels = True + assert df["A"].flags.allows_duplicate_labels is True @pytest.mark.parametrize( "objs, kwargs", @@ -106,83 +130,55 @@ def test_ndframe_getitem_caching_issue(self, request, using_copy_on_write): # Series ( [ - pd.Series(1, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), - pd.Series(2, index=["c", "d"]).set_flags( - allows_duplicate_labels=False - ), + pd.Series(1, index=["a", "b"]), + pd.Series(2, index=["c", "d"]), ], {}, ), ( [ - pd.Series(1, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), - pd.Series(2, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), + pd.Series(1, index=["a", "b"]), + pd.Series(2, index=["a", "b"]), ], {"ignore_index": True}, ), ( [ - pd.Series(1, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), - pd.Series(2, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), + pd.Series(1, index=["a", "b"]), + pd.Series(2, index=["a", "b"]), ], {"axis": 1}, ), # Frame ( [ - pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), - pd.DataFrame({"A": [1, 2]}, index=["c", "d"]).set_flags( - allows_duplicate_labels=False - ), + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), + pd.DataFrame({"A": [1, 2]}, index=["c", "d"]), ], {}, ), ( [ - pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), - pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), ], {"ignore_index": True}, ), ( [ - pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), - pd.DataFrame({"B": [1, 2]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), + pd.DataFrame({"B": [1, 2]}, index=["a", "b"]), ], {"axis": 1}, ), # Series / Frame ( [ - pd.DataFrame({"A": [1, 2]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), + pd.DataFrame({"A": [1, 2]}, index=["a", "b"]), pd.Series( [1, 2], index=["a", "b"], name="B", - ).set_flags( - allows_duplicate_labels=False, ), ], {"axis": 1}, @@ -190,30 +186,29 @@ def test_ndframe_getitem_caching_issue(self, request, using_copy_on_write): ], ) def test_concat(self, objs, kwargs): + with tm.assert_produces_warning(FutureWarning, match=set_msg): + objs = [x.set_flags(allows_duplicate_labels=False) for x in objs] result = pd.concat(objs, **kwargs) - assert result.flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert result.flags.allows_duplicate_labels is False @pytest.mark.parametrize( - "left, right, kwargs, expected", + "left, right, should_set, kwargs, expected", [ # false false false pytest.param( - pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), - pd.DataFrame({"B": [0, 1]}, index=["a", "d"]).set_flags( - allows_duplicate_labels=False - ), + pd.DataFrame({"A": [0, 1]}, index=["a", "b"]), + pd.DataFrame({"B": [0, 1]}, index=["a", "d"]), + (True, True), {"left_index": True, "right_index": True}, False, marks=not_implemented, ), # false true false pytest.param( - pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ), + pd.DataFrame({"A": [0, 1]}, index=["a", "b"]), pd.DataFrame({"B": [0, 1]}, index=["a", "d"]), + (True, False), {"left_index": True, "right_index": True}, False, marks=not_implemented, @@ -222,14 +217,23 @@ def test_concat(self, objs, kwargs): ( pd.DataFrame({"A": [0, 1]}, index=["a", "b"]), pd.DataFrame({"B": [0, 1]}, index=["a", "d"]), + (False, False), {"left_index": True, "right_index": True}, True, ), ], ) - def test_merge(self, left, right, kwargs, expected): + def test_merge(self, left, right, should_set, kwargs, expected): + should_set_left, should_set_right = should_set + if should_set_left: + with tm.assert_produces_warning(FutureWarning, match=set_msg): + left = left.set_flags(allows_duplicate_labels=False) + if should_set_right: + with tm.assert_produces_warning(FutureWarning, match=set_msg): + right = right.set_flags(allows_duplicate_labels=False) result = pd.merge(left, right, **kwargs) - assert result.flags.allows_duplicate_labels is expected + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert result.flags.allows_duplicate_labels is expected @not_implemented def test_groupby(self): @@ -238,9 +242,11 @@ def test_groupby(self): # - apply # - transform # - Should passing a grouper that disallows duplicates propagate? - df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False) result = df.groupby([0, 0, 1]).agg("count") - assert result.flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert result.flags.allows_duplicate_labels is False @pytest.mark.parametrize("frame", [True, False]) @not_implemented @@ -253,9 +259,10 @@ def test_window(self, frame): ) if frame: df = df.to_frame() - assert df.rolling(3).mean().flags.allows_duplicate_labels is False - assert df.ewm(3).mean().flags.allows_duplicate_labels is False - assert df.expanding(3).mean().flags.allows_duplicate_labels is False + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert df.rolling(3).mean().flags.allows_duplicate_labels is False + assert df.ewm(3).mean().flags.allows_duplicate_labels is False + assert df.expanding(3).mean().flags.allows_duplicate_labels is False # ---------------------------------------------------------------------------- @@ -274,11 +281,13 @@ class TestRaises: ) def test_set_flags_with_duplicates(self, cls, axes): result = cls(**axes) - assert result.flags.allows_duplicate_labels is True + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert result.flags.allows_duplicate_labels is True msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): - cls(**axes).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + cls(**axes).set_flags(allows_duplicate_labels=False) @pytest.mark.parametrize( "data", @@ -291,13 +300,18 @@ def test_set_flags_with_duplicates(self, cls, axes): def test_setting_allows_duplicate_labels_raises(self, data): msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): - data.flags.allows_duplicate_labels = False + with tm.assert_produces_warning(FutureWarning, match=get_msg): + data.flags.allows_duplicate_labels = False - assert data.flags.allows_duplicate_labels is True + with tm.assert_produces_warning(FutureWarning, match=get_msg): + assert data.flags.allows_duplicate_labels is True def test_series_raises(self): a = pd.Series(0, index=["a", "b"]) - b = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + b = pd.Series([0, 1], index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): pd.concat([a, b]) @@ -317,9 +331,10 @@ def test_series_raises(self): ], ) def test_getitem_raises(self, getter, target): - df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( - allows_duplicate_labels=False - ) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags( + allows_duplicate_labels=False + ) if target: # df, df.loc, or df.iloc target = getattr(df, target) @@ -335,27 +350,26 @@ def test_getitem_raises(self, getter, target): [ ( [ - pd.Series(1, index=[0, 1], name="a").set_flags( - allows_duplicate_labels=False - ), - pd.Series(2, index=[0, 1], name="a").set_flags( - allows_duplicate_labels=False - ), + pd.Series(1, index=[0, 1], name="a"), + pd.Series(2, index=[0, 1], name="a"), ], {"axis": 1}, ) ], ) def test_concat_raises(self, objs, kwargs): + with tm.assert_produces_warning(FutureWarning, match=set_msg): + objs = [x.set_flags(allows_duplicate_labels=False) for x in objs] msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): pd.concat(objs, **kwargs) @not_implemented def test_merge_raises(self): - a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags( - allows_duplicate_labels=False - ) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags( + allows_duplicate_labels=False + ) b = pd.DataFrame({"B": [0, 1, 2]}, index=["a", "b", "b"]) msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): @@ -379,14 +393,20 @@ def test_merge_raises(self): ) def test_raises_basic(idx): msg = "Index has duplicates." + with pytest.raises(pd.errors.DuplicateLabelError, match=msg): - pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False) with pytest.raises(pd.errors.DuplicateLabelError, match=msg): - pd.DataFrame({"A": [1, 1]}, index=idx).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + pd.DataFrame({"A": [1, 1]}, index=idx).set_flags( + allows_duplicate_labels=False + ) with pytest.raises(pd.errors.DuplicateLabelError, match=msg): - pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False) def test_format_duplicate_labels_message(): @@ -409,7 +429,8 @@ def test_format_duplicate_labels_message_multi(): def test_dataframe_insert_raises(): - df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False) msg = "Cannot specify" with pytest.raises(ValueError, match=msg): df.insert(0, "A", [3, 4], allow_duplicates=True) @@ -424,11 +445,13 @@ def test_dataframe_insert_raises(): ], ) def test_inplace_raises(method, frame_only): - df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags( - allows_duplicate_labels=False - ) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags( + allows_duplicate_labels=False + ) s = df["A"] - s.flags.allows_duplicate_labels = False + with tm.assert_produces_warning(FutureWarning, match=get_msg): + s.flags.allows_duplicate_labels = False msg = "Cannot specify" with pytest.raises(ValueError, match=msg): @@ -439,10 +462,12 @@ def test_inplace_raises(method, frame_only): def test_pickle(): - a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False) b = tm.round_trip_pickle(a) tm.assert_series_equal(a, b) - a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False) b = tm.round_trip_pickle(a) tm.assert_frame_equal(a, b) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 74d8277c975e4..deca2b690a4f2 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -445,6 +445,9 @@ def test_flags_identity(self, frame_or_series): if frame_or_series is DataFrame: obj = obj.to_frame() - assert obj.flags is obj.flags + msg = "(DataFrame|Series).flags is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.flags is obj.flags obj2 = obj.copy() - assert obj2.flags is not obj.flags + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj2.flags is not obj.flags diff --git a/pandas/tests/test_flags.py b/pandas/tests/test_flags.py index 9294b3fc3319b..d5a7179c0e652 100644 --- a/pandas/tests/test_flags.py +++ b/pandas/tests/test_flags.py @@ -1,12 +1,17 @@ import pytest import pandas as pd +import pandas._testing as tm + +set_msg = "DataFrame.set_flags is deprecated" +get_msg = "DataFrame.flags is deprecated" class TestFlags: def test_equality(self): - a = pd.DataFrame().set_flags(allows_duplicate_labels=True).flags - b = pd.DataFrame().set_flags(allows_duplicate_labels=False).flags + with tm.assert_produces_warning(FutureWarning, match=set_msg): + a = pd.DataFrame().set_flags(allows_duplicate_labels=True).flags + b = pd.DataFrame().set_flags(allows_duplicate_labels=False).flags assert a == a assert b == b @@ -14,29 +19,35 @@ def test_equality(self): assert a != 2 def test_set(self): - df = pd.DataFrame().set_flags(allows_duplicate_labels=True) - a = df.flags + with tm.assert_produces_warning(FutureWarning, match=set_msg): + df = pd.DataFrame().set_flags(allows_duplicate_labels=True) + with tm.assert_produces_warning(FutureWarning, match=get_msg): + a = df.flags a.allows_duplicate_labels = False assert a.allows_duplicate_labels is False a["allows_duplicate_labels"] = True assert a.allows_duplicate_labels is True def test_repr(self): - a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=True).flags) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=True).flags) assert a == "" - a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=False).flags) + with tm.assert_produces_warning(FutureWarning, match=set_msg): + a = repr(pd.DataFrame({"A"}).set_flags(allows_duplicate_labels=False).flags) assert a == "" def test_obj_ref(self): df = pd.DataFrame() - flags = df.flags + with tm.assert_produces_warning(FutureWarning, match=get_msg): + flags = df.flags del df with pytest.raises(ValueError, match="object has been deleted"): flags.allows_duplicate_labels = True def test_getitem(self): df = pd.DataFrame() - flags = df.flags + with tm.assert_produces_warning(FutureWarning, match=get_msg): + flags = df.flags assert flags["allows_duplicate_labels"] is True flags["allows_duplicate_labels"] = False assert flags["allows_duplicate_labels"] is False diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 2d3b47cd2e994..90099da0828f4 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -252,7 +252,9 @@ def test_assert_frame_equal_datetime_like_dtype_mismatch(dtype): def test_allows_duplicate_labels(): left = DataFrame() - right = DataFrame().set_flags(allows_duplicate_labels=False) + msg = "DataFrame.set_flags is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + right = DataFrame().set_flags(allows_duplicate_labels=False) tm.assert_frame_equal(left, left) tm.assert_frame_equal(right, right) tm.assert_frame_equal(left, right, check_flags=False) @@ -298,10 +300,13 @@ def test_assert_frame_equal_check_like_different_indexes(): def test_assert_frame_equal_checking_allow_dups_flag(): # GH#45554 left = DataFrame([[1, 2], [3, 4]]) - left.flags.allows_duplicate_labels = False + msg = "DataFrame.flags is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + left.flags.allows_duplicate_labels = False right = DataFrame([[1, 2], [3, 4]]) - right.flags.allows_duplicate_labels = True + with tm.assert_produces_warning(FutureWarning, match=msg): + right.flags.allows_duplicate_labels = True tm.assert_frame_equal(left, right, check_flags=False) with pytest.raises(AssertionError, match="allows_duplicate_labels"): diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 835f710842cc0..0ca247110bd2a 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -357,7 +357,9 @@ def test_assert_series_equal_ignore_extension_dtype_mismatch(right_dtype): def test_allows_duplicate_labels(): left = Series([1]) - right = Series([1]).set_flags(allows_duplicate_labels=False) + msg = "Series.set_flags is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + right = Series([1]).set_flags(allows_duplicate_labels=False) tm.assert_series_equal(left, left) tm.assert_series_equal(right, right) tm.assert_series_equal(left, right, check_flags=False) From 080e4a514169bcd5eb91f43ba909379f3fc4b38b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Apr 2023 16:28:14 -0700 Subject: [PATCH 2/5] doctests, docbuild --- doc/source/user_guide/duplicates.rst | 6 ++++++ pandas/conftest.py | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/doc/source/user_guide/duplicates.rst b/doc/source/user_guide/duplicates.rst index 7894789846ce8..3475dd0cea3f8 100644 --- a/doc/source/user_guide/duplicates.rst +++ b/doc/source/user_guide/duplicates.rst @@ -121,6 +121,7 @@ will be raised. .. ipython:: python :okexcept: + :okwarning: pd.Series([0, 1, 2], index=["a", "b", "b"]).set_flags(allows_duplicate_labels=False) @@ -128,6 +129,7 @@ This applies to both row and column labels for a :class:`DataFrame` .. ipython:: python :okexcept: + :okwarning: pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=["A", "B", "C"],).set_flags( allows_duplicate_labels=False @@ -137,6 +139,7 @@ This attribute can be checked or set with :attr:`~DataFrame.flags.allows_duplica which indicates whether that object can have duplicate labels. .. ipython:: python + :okwarning: df = pd.DataFrame({"A": [0, 1, 2, 3]}, index=["x", "y", "X", "Y"]).set_flags( allows_duplicate_labels=False @@ -148,6 +151,7 @@ which indicates whether that object can have duplicate labels. like ``allows_duplicate_labels`` set to some value .. ipython:: python + :okwarning: df2 = df.set_flags(allows_duplicate_labels=True) df2.flags.allows_duplicate_labels @@ -157,6 +161,7 @@ Or the property can just be set directly on the same object .. ipython:: python + :okwarning: df2.flags.allows_duplicate_labels = False df2.flags.allows_duplicate_labels @@ -193,6 +198,7 @@ operations. .. ipython:: python :okexcept: + :okwarning: s1 = pd.Series(0, index=["a", "b"]).set_flags(allows_duplicate_labels=False) s1 diff --git a/pandas/conftest.py b/pandas/conftest.py index 70e1c317c2043..5adfd3852ce1b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -142,6 +142,14 @@ def pytest_collection_modifyitems(items, config) -> None: "(Series|DataFrame).bool is now deprecated and will be removed " "in future version of pandas", ), + ( + ("Flags", "flags", "allows_duplicate_labels"), + "DataFrame.flags is deprecated and will be removed in a future version", + ), + ( + "set_flags", + "DataFrame.set_flags is deprecated and will be removed in a future version", + ), ] for item in items: From 95186a5ca5d10ddf0d54332cf24682d32a66347e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Apr 2023 19:48:32 -0700 Subject: [PATCH 3/5] mypy fixup --- pandas/conftest.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 5adfd3852ce1b..3d3382eadfe6c 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -143,7 +143,15 @@ def pytest_collection_modifyitems(items, config) -> None: "in future version of pandas", ), ( - ("Flags", "flags", "allows_duplicate_labels"), + "Flags", + "DataFrame.flags is deprecated and will be removed in a future version", + ), + ( + "flags", + "DataFrame.flags is deprecated and will be removed in a future version", + ), + ( + "allows_duplicate_labels", "DataFrame.flags is deprecated and will be removed in a future version", ), ( From 8e02b92ea88ab36d5ec94645bf5c75695527d8f5 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Apr 2023 13:04:27 -0700 Subject: [PATCH 4/5] suppress doctest warnings --- pandas/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 3d3382eadfe6c..cff69471dbcb2 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -158,6 +158,14 @@ def pytest_collection_modifyitems(items, config) -> None: "set_flags", "DataFrame.set_flags is deprecated and will be removed in a future version", ), + ( + "DuplicateLabelError", + "Series.set_flags is deprecated and will be removed in a future version", + ), + ( + "DuplicateLabelError", + "Series.flags is deprecated and will be removed in a future version", + ), ] for item in items: From e67982c2dcd9e0f101ca1582bbdc82abd74ac952 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 18 Dec 2023 10:32:06 -0800 Subject: [PATCH 5/5] update inspect.getmembers tests --- pandas/tests/frame/test_api.py | 5 ++++- pandas/tests/series/test_api.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index c6c314e955786..0e536ffa8cf3a 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -394,6 +394,9 @@ def test_inspect_getmembers(self): df = DataFrame() msg = "DataFrame._data is deprecated" with tm.assert_produces_warning( - DeprecationWarning, match=msg, check_stacklevel=False + # FutureWarning is for DataFrame.flags + (FutureWarning, DeprecationWarning), + match=msg, + check_stacklevel=False, ): inspect.getmembers(df) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 29d6e2036476e..60156302a03c5 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -173,7 +173,10 @@ def test_inspect_getmembers(self): ser = Series(dtype=object) msg = "Series._data is deprecated" with tm.assert_produces_warning( - DeprecationWarning, match=msg, check_stacklevel=False + # FutureWarning is for Series.flags + (FutureWarning, DeprecationWarning), + match=msg, + check_stacklevel=False, ): inspect.getmembers(ser)