From 5cc32057b672e89cbbeef9399dea077b64ae2e6b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 2 Aug 2022 10:35:37 -0700 Subject: [PATCH 1/8] ENH: copy keyword to set_axis --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/frame.py | 33 +++++++++++--- pandas/core/generic.py | 50 +++++++++++++++++---- pandas/core/series.py | 31 ++++++++++--- pandas/tests/frame/methods/test_set_axis.py | 31 +++++++++++++ 5 files changed, 128 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 6e38024e02f36..55c31fe1c77f2 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -279,6 +279,7 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`??`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f0b7349f74f13..2784da605e339 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5010,17 +5010,34 @@ def align( @overload def set_axis( - self, labels, *, axis: Axis = ..., inplace: Literal[False] = ... + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[False] = ..., + copy: bool | lib.NoDefault = ..., ) -> DataFrame: ... @overload - def set_axis(self, labels, *, axis: Axis = ..., inplace: Literal[True]) -> None: + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[True], + copy: bool | lib.NoDefault = ..., + ) -> None: ... @overload def set_axis( - self, labels, *, axis: Axis = ..., inplace: bool = ... + self, + labels, + *, + axis: Axis = ..., + inplace: bool = ..., + copy: bool | lib.NoDefault = ..., ) -> DataFrame | None: ... @@ -5064,8 +5081,14 @@ def set_axis( see_also_sub=" or columns", ) @Appender(NDFrame.set_axis.__doc__) - def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): - return super().set_axis(labels, axis=axis, inplace=inplace) + def set_axis( + self, + labels, + axis: Axis = 0, + inplace: bool = False, + copy: bool | lib.NoDefault = lib.no_default, + ): + return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy) @Substitution(**_shared_doc_kwargs) @Appender(NDFrame.reindex.__doc__) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7352ad2a4985d..d8393ea9f435c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -708,23 +708,43 @@ def size(self) -> int: @overload def set_axis( - self: NDFrameT, labels, *, axis: Axis = ..., inplace: Literal[False] = ... + self: NDFrameT, + labels, + *, + axis: Axis = ..., + inplace: Literal[False] = ..., + copy: bool_t | lib.NoDefault = ..., ) -> NDFrameT: ... @overload - def set_axis(self, labels, *, axis: Axis = ..., inplace: Literal[True]) -> None: + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[True], + copy: bool_t | lib.NoDefault = ..., + ) -> None: ... @overload def set_axis( - self: NDFrameT, labels, *, axis: Axis = ..., inplace: bool_t = ... + self: NDFrameT, + labels, + *, + axis: Axis = ..., + inplace: bool_t | lib.NoDefault = ..., ) -> NDFrameT | None: ... @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) def set_axis( - self: NDFrameT, labels, axis: Axis = 0, inplace: bool_t = False + self: NDFrameT, + labels, + axis: Axis = 0, + inplace: bool_t = False, + copy: bool_t | lib.NoDefault = lib.no_default, ) -> NDFrameT | None: """ Assign desired index to given axis. @@ -744,6 +764,11 @@ def set_axis( inplace : bool, default False Whether to return a new %(klass)s instance. + copy : bool, default True + Whether to make a copy of the underlying data. + + .. versionadded:: 1.5.0 + Returns ------- renamed : %(klass)s or None @@ -753,16 +778,25 @@ def set_axis( -------- %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. """ + if inplace: + if copy is True: + raise ValueError("Cannot specify both inplace=True and copy=True") + copy = False + elif copy is lib.no_default: + copy = True + self._check_inplace_and_allows_duplicate_labels(inplace) - return self._set_axis_nocheck(labels, axis, inplace) + return self._set_axis_nocheck(labels, axis, inplace, copy=copy) @final - def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t): + def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t): # NDFrame.rename with inplace=False calls set_axis(inplace=True) on a copy. if inplace: setattr(self, self._get_axis_name(axis), labels) else: - obj = self.copy() + # With copy=False, we create a new object but don't copy the + # underlying data. + obj = self.copy(deep=copy) obj.set_axis(labels, axis=axis, inplace=True) return obj @@ -1050,7 +1084,7 @@ def _rename( raise KeyError(f"{missing_labels} not found in axis") new_index = ax._transform_index(f, level=level) - result._set_axis_nocheck(new_index, axis=axis_no, inplace=True) + result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False) result._clear_item_cache() if inplace: diff --git a/pandas/core/series.py b/pandas/core/series.py index 765bf9f7e04f1..172f90c52ba99 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4926,17 +4926,34 @@ def rename( @overload def set_axis( - self, labels, *, axis: Axis = ..., inplace: Literal[False] = ... + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[False] = ..., + copy: bool | lib.NoDefault = ..., ) -> Series: ... @overload - def set_axis(self, labels, *, axis: Axis = ..., inplace: Literal[True]) -> None: + def set_axis( + self, + labels, + *, + axis: Axis = ..., + inplace: Literal[True], + copy: bool | lib.NoDefault = ..., + ) -> None: ... @overload def set_axis( - self, labels, *, axis: Axis = ..., inplace: bool = ... + self, + labels, + *, + axis: Axis = ..., + inplace: bool = ..., + copy: bool | lib.NoDefault = ..., ) -> Series | None: ... @@ -4968,9 +4985,13 @@ def set_axis( ) @Appender(NDFrame.set_axis.__doc__) def set_axis( # type: ignore[override] - self, labels, axis: Axis = 0, inplace: bool = False + self, + labels, + axis: Axis = 0, + inplace: bool = False, + copy: bool | lib.NoDefault = lib.no_default, ) -> Series | None: - return super().set_axis(labels, axis=axis, inplace=inplace) + return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy) # error: Cannot determine type of 'reindex' @doc( diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 3284243ddac48..2bf612895aa1e 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -24,6 +24,37 @@ def test_set_axis(self, obj): result = obj.set_axis(new_index, axis=0, inplace=False) tm.assert_equal(expected, result) + def test_set_axis_copy(self, obj): + # Test copy keyword + new_index = list("abcd")[: len(obj)] + + expected = obj.copy() + expected.index = new_index + + with pytest.raises( + ValueError, match="Cannot specify both inplace=True and copy=True" + ): + obj.set_axis(new_index, axis=0, inplace=True, copy=True) + + result = obj.set_axis(new_index, axis=0, copy=True) + tm.assert_equal(expected, result) + assert result is not obj + # check we DID make a copy + assert not tm.shares_memory(result, obj) + + result = obj.set_axis(new_index, axis=0, copy=False) + tm.assert_equal(expected, result) + assert result is not obj + # check we did NOT make a copy + assert tm.shares_memory(result, obj) + + # copy defaults to True + result = obj.set_axis(new_index, axis=0) + tm.assert_equal(expected, result) + assert result is not obj + # check we DID make a copy + assert not tm.shares_memory(result, obj) + @pytest.mark.parametrize("axis", [0, "index", 1, "columns"]) def test_set_axis_inplace_axis(self, axis, obj): # GH#14636 From b2214920dadeadc7a120c34d0491ea23446f762a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 2 Aug 2022 10:37:28 -0700 Subject: [PATCH 2/8] GH ref --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/tests/frame/methods/test_set_axis.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 55c31fe1c77f2..03007355e68e4 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -279,7 +279,7 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) -- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`??`) +- Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 2bf612895aa1e..556a5ab13ce5b 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -25,7 +25,7 @@ def test_set_axis(self, obj): tm.assert_equal(expected, result) def test_set_axis_copy(self, obj): - # Test copy keyword + # Test copy keyword GH#47932 new_index = list("abcd")[: len(obj)] expected = obj.copy() From 526b471f1b0918ff75729eed39c539c5e77a5570 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 2 Aug 2022 12:18:01 -0700 Subject: [PATCH 3/8] fix test --- pandas/core/generic.py | 1 + pandas/tests/frame/methods/test_set_axis.py | 24 ++++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d8393ea9f435c..38432afbc77fe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -744,6 +744,7 @@ def set_axis( labels, axis: Axis = 0, inplace: bool_t = False, + *, copy: bool_t | lib.NoDefault = lib.no_default, ) -> NDFrameT | None: """ diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 556a5ab13ce5b..a43f118a19265 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -40,20 +40,38 @@ def test_set_axis_copy(self, obj): tm.assert_equal(expected, result) assert result is not obj # check we DID make a copy - assert not tm.shares_memory(result, obj) + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) result = obj.set_axis(new_index, axis=0, copy=False) tm.assert_equal(expected, result) assert result is not obj # check we did NOT make a copy - assert tm.shares_memory(result, obj) + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) # copy defaults to True result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) assert result is not obj # check we DID make a copy - assert not tm.shares_memory(result, obj) + if obj.ndim == 1: + assert not tm.shares_memory(result, obj) + else: + assert not any( + tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) + for i in range(obj.shape[1]) + ) @pytest.mark.parametrize("axis", [0, "index", 1, "columns"]) def test_set_axis_inplace_axis(self, axis, obj): From 86173b5caa3c1be44793d475dd98581921f24db4 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 3 Aug 2022 18:21:32 -0700 Subject: [PATCH 4/8] fix test --- pandas/tests/frame/methods/test_set_axis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index a43f118a19265..6639a6804ec64 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -53,9 +53,9 @@ def test_set_axis_copy(self, obj): assert result is not obj # check we did NOT make a copy if obj.ndim == 1: - assert not tm.shares_memory(result, obj) + assert tm.shares_memory(result, obj) else: - assert not any( + assert all( tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) for i in range(obj.shape[1]) ) From 360f0b7945bbfcb1cf22f8d1f8455d325dbc5e59 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 3 Aug 2022 18:27:27 -0700 Subject: [PATCH 5/8] troubleshoot mypy --- pandas/core/frame.py | 1 + pandas/core/generic.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2784da605e339..2265fcb263bb3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5086,6 +5086,7 @@ def set_axis( labels, axis: Axis = 0, inplace: bool = False, + *, copy: bool | lib.NoDefault = lib.no_default, ): return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 38432afbc77fe..0a8a9108c8604 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -735,6 +735,7 @@ def set_axis( *, axis: Axis = ..., inplace: bool_t | lib.NoDefault = ..., + copy: bool_t | lib.NoDefault = ..., ) -> NDFrameT | None: ... From a9610340b253481f821f513b2e798ce020e81368 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 4 Aug 2022 09:46:18 -0700 Subject: [PATCH 6/8] mypy fixup --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0a8a9108c8604..e80b4a45bb774 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -734,7 +734,7 @@ def set_axis( labels, *, axis: Axis = ..., - inplace: bool_t | lib.NoDefault = ..., + inplace: bool_t = ..., copy: bool_t | lib.NoDefault = ..., ) -> NDFrameT | None: ... From 135527b615768ec9840884a336b3c951c8b3cd70 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 9 Aug 2022 15:39:35 -0700 Subject: [PATCH 7/8] test inplace=True/copy=False case --- pandas/tests/frame/methods/test_set_axis.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 6639a6804ec64..67488dff3c335 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -28,6 +28,7 @@ def test_set_axis_copy(self, obj): # Test copy keyword GH#47932 new_index = list("abcd")[: len(obj)] + orig = obj.iloc[:] expected = obj.copy() expected.index = new_index @@ -73,6 +74,19 @@ def test_set_axis_copy(self, obj): for i in range(obj.shape[1]) ) + # Do this last since it alters obj inplace + res = obj.set_axis(new_index, inplace=True, copy=False) + assert res is None + tm.assert_equal(expected, obj) + # check we did NOT make a copy + if obj.ndim == 1: + assert tm.shares_memory(obj, orig) + else: + assert all( + tm.shares_memory(obj.iloc[:, i], orig.iloc[:, i]) + for i in range(obj.shape[1]) + ) + @pytest.mark.parametrize("axis", [0, "index", 1, "columns"]) def test_set_axis_inplace_axis(self, axis, obj): # GH#14636 From a9bd8a6c6c7b4ccb7e58f3b392964e6a9fd5a679 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 16 Aug 2022 14:45:13 -0700 Subject: [PATCH 8/8] mypy fixup --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 60b0a3bad9d26..9adcfddc4006c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5108,7 +5108,7 @@ def set_axis( see_also_sub=" or columns", ) @Appender(NDFrame.set_axis.__doc__) - def set_axis( # type: ignore[override] + def set_axis( self, labels, axis: Axis = 0,