diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9bf170d6eb9e4..224604a8ff8d0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -246,6 +246,10 @@ Copy-on-Write improvements a modification to the data happens) when constructing a Series from an existing Series with the default of ``copy=False`` (:issue:`50471`) +- The :class:`DataFrame` constructor will now create a lazy copy (deferring the copy until + a modification to the data happens) when constructing from an existing + :class:`DataFrame` with the default of ``copy=False`` (:issue:`51239`) + - The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary of Series objects and specifying ``copy=False``, will now use a lazy copy of those Series objects for the columns of the DataFrame (:issue:`50777`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 49416cc2d53c0..d80c80fa5d0ab 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -656,6 +656,8 @@ def __init__( data = data.copy(deep=False) if isinstance(data, (BlockManager, ArrayManager)): + if using_copy_on_write(): + data = data.copy(deep=False) # first check if a Manager is passed without any other arguments # -> use fastpath (without checking Manager type) if index is None and columns is None and dtype is None and not copy: diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 2cacf0d6f6f91..6cf45c194707e 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -82,6 +82,25 @@ def test_series_from_series_with_reindex(using_copy_on_write): assert not result._mgr.blocks[0].refs.has_reference() +@pytest.mark.parametrize("func", [lambda x: x, lambda x: x._mgr]) +@pytest.mark.parametrize("columns", [None, ["a"]]) +def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, func): + df = DataFrame({"a": [1, 2, 3]}) + df_orig = df.copy() + + new_df = DataFrame(func(df)) + + assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) + new_df.iloc[0] = 100 + + if using_copy_on_write: + assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) + tm.assert_frame_equal(df, df_orig) + else: + assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) + tm.assert_frame_equal(df, new_df) + + @pytest.mark.parametrize("dtype", [None, "int64", "Int64"]) @pytest.mark.parametrize("index", [None, [0, 1, 2]]) @pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])