From 2ec85c2c7ada208accb8bab6191fbbb56130f947 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 14 Jan 2022 07:20:30 -0800 Subject: [PATCH] Backport PR #44889: BUG: ArrayManager not respecting copy keyword --- pandas/core/internals/construction.py | 23 ++++++------------- pandas/tests/frame/indexing/test_setitem.py | 4 ---- pandas/tests/frame/test_constructors.py | 13 ++++------- pandas/tests/frame/test_reductions.py | 12 ++-------- .../tests/indexing/multiindex/test_setitem.py | 8 +++---- 5 files changed, 16 insertions(+), 44 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 532309dfc40b3..4bec30cad22cc 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -35,7 +35,6 @@ ) from pandas.core.dtypes.common import ( is_1d_only_ea_dtype, - is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, is_extension_array_dtype, @@ -44,7 +43,6 @@ is_named_tuple, is_object_dtype, ) -from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, @@ -475,23 +473,16 @@ def dict_to_mgr( keys = list(data.keys()) columns = Index(keys) arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] - # GH#24096 need copy to be deep for datetime64tz case - # TODO: See if we can avoid these copies arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays] - arrays = [ - arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays - ] if copy: - # arrays_to_mgr (via form_blocks) won't make copies for EAs - # dtype attr check to exclude EADtype-castable strs - arrays = [ - x - if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype) - else x.copy() - for x in arrays - ] - # TODO: can we get rid of the dt64tz special case above? + if typ == "block": + # We only need to copy arrays that will not get consolidated, i.e. + # only EA arrays + arrays = [x.copy() if isinstance(x, ExtensionArray) else x for x in arrays] + else: + # dtype check to exclude e.g. range objects, scalars + arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays] return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index cd0a0a0467742..c8a22e48d16c1 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -421,10 +421,6 @@ def test_setitem_frame_duplicate_columns(self, using_array_manager, request): # TODO(ArrayManager) .loc still overwrites expected["B"] = expected["B"].astype("int64") - mark = pytest.mark.xfail( - reason="Both 'A' columns get set with 3 instead of 0 and 3" - ) - request.node.add_marker(mark) else: # set these with unique columns to be extra-unambiguous expected[2] = expected[2].astype(np.int64) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 7f030fc11a20b..3537f155c08f5 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2158,8 +2158,6 @@ def test_constructor_ndarray_copy(self, float_frame, using_array_manager): arr[0, 0] = 1000 assert df.iloc[0, 0] == 1000 - # TODO(ArrayManager) keep view on Series? - @td.skip_array_manager_not_yet_implemented def test_constructor_series_copy(self, float_frame): series = float_frame._series @@ -2508,13 +2506,10 @@ def test_constructor_list_str_na(self, string_dtype): def test_dict_nocopy( self, request, copy, any_numeric_ea_dtype, any_numpy_dtype, using_array_manager ): - if using_array_manager and not ( - (any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES)) - or ( - any_numpy_dtype - in (tm.DATETIME64_DTYPES + tm.TIMEDELTA64_DTYPES + tm.BOOL_DTYPES) - and copy - ) + if ( + using_array_manager + and not copy + and not (any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES)) ): # TODO(ArrayManager) properly honor copy keyword for dict input td.mark_array_manager_not_yet_implemented(request) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 245e54d665745..43cf8cccb1784 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -773,11 +773,7 @@ def test_operators_timedelta64(self): def test_std_timedelta64_skipna_false(self): # GH#37392 tdi = pd.timedelta_range("1 Day", periods=10) - df = DataFrame({"A": tdi, "B": tdi}) - # Copy is needed for ArrayManager case, otherwise setting df.iloc - # below edits tdi, alterting both df['A'] and df['B'] - # FIXME: passing copy=True to constructor does not fix this - df = df.copy() + df = DataFrame({"A": tdi, "B": tdi}, copy=True) df.iloc[-2, -1] = pd.NaT result = df.std(skipna=False) @@ -1064,11 +1060,7 @@ def test_idxmax_idxmin_convert_dtypes(self, op, expected_value): def test_idxmax_dt64_multicolumn_axis1(self): dti = date_range("2016-01-01", periods=3) - df = DataFrame({3: dti, 4: dti[::-1]}) - # FIXME: copy needed for ArrayManager, otherwise setting with iloc - # below also sets df.iloc[-1, 1]; passing copy=True to DataFrame - # does not solve this. - df = df.copy() + df = DataFrame({3: dti, 4: dti[::-1]}, copy=True) df.iloc[0, 0] = pd.NaT df._consolidate_inplace() diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 47868779c7ef7..a9af83fa632f2 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -214,11 +214,9 @@ def test_multiindex_assignment_single_dtype(self, using_array_manager): exp = Series(arr, index=[8, 10], name="c", dtype="int64") result = df.loc[4, "c"] tm.assert_series_equal(result, exp) - if not using_array_manager: - # FIXME(ArrayManager): this correctly preserves dtype, - # but incorrectly is not inplace. - # extra check for inplace-ness - tm.assert_numpy_array_equal(view, exp.values) + + # extra check for inplace-ness + tm.assert_numpy_array_equal(view, exp.values) # arr + 0.5 cannot be cast losslessly to int, so we upcast df.loc[4, "c"] = arr + 0.5