From 5d69654a4ad3ba964ffa6f852aed195c31ed5b1a Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Mon, 3 Apr 2023 04:49:26 +0200 Subject: [PATCH] Backport PR #52068 on branch 2.0.x (CoW: Switch to copy=False everywhere for Series constructor) (#52367) Backport PR #52068: CoW: Switch to copy=False everywhere for Series constructor Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com> --- pandas/core/algorithms.py | 6 +++--- pandas/core/arrays/_mixins.py | 2 +- pandas/core/arrays/arrow/array.py | 2 +- pandas/core/arrays/categorical.py | 8 ++++++-- pandas/core/arrays/masked.py | 4 ++-- pandas/core/arrays/sparse/accessor.py | 1 + pandas/core/arrays/sparse/array.py | 2 +- pandas/core/arrays/sparse/scipy_sparse.py | 2 +- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/accessors.py | 2 +- pandas/core/reshape/encoding.py | 4 ++-- pandas/core/strings/accessor.py | 4 ++-- pandas/core/strings/object_array.py | 2 +- pandas/core/tools/datetimes.py | 2 +- pandas/core/window/ewm.py | 4 ++-- pandas/core/window/rolling.py | 8 ++++---- pandas/io/stata.py | 4 ++-- pandas/plotting/_matplotlib/boxplot.py | 2 +- 19 files changed, 34 insertions(+), 29 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4f153863c8cb8..29009c1627cfb 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -837,7 +837,7 @@ def value_counts( if bins is not None: from pandas.core.reshape.tile import cut - values = Series(values) + values = Series(values, copy=False) try: ii = cut(values, bins, include_lowest=True) except TypeError as err: @@ -860,7 +860,7 @@ def value_counts( else: if is_extension_array_dtype(values): # handle Categorical and sparse, - result = Series(values)._values.value_counts(dropna=dropna) + result = Series(values, copy=False)._values.value_counts(dropna=dropna) result.name = name result.index.name = index_name counts = result._values @@ -892,7 +892,7 @@ def value_counts( idx = idx.astype(object) idx.name = index_name - result = Series(counts, index=idx, name=name) + result = Series(counts, index=idx, name=name, copy=False) if sort: result = result.sort_values(ascending=ascending) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 5d1bb04cfacbd..8804582798d12 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -448,7 +448,7 @@ def value_counts(self, dropna: bool = True) -> Series: index_arr = self._from_backing_data(np.asarray(result.index._data)) index = Index(index_arr, name=result.index.name) - return Series(result._values, index=index, name=result.name) + return Series(result._values, index=index, name=result.name, copy=False) def _quantile( self: NDArrayBackedExtensionArrayT, diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 8e6b9db0fe7d4..a0245be8e14c1 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1104,7 +1104,7 @@ def value_counts(self, dropna: bool = True) -> Series: index = Index(type(self)(values)) - return Series(counts, index=index, name="count") + return Series(counts, index=index, name="count", copy=False) @classmethod def _concat_same_type( diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 02ee2eb4a80ce..ef76a5301cb65 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1500,7 +1500,9 @@ def value_counts(self, dropna: bool = True) -> Series: ix = coerce_indexer_dtype(ix, self.dtype.categories) ix = self._from_backing_data(ix) - return Series(count, index=CategoricalIndex(ix), dtype="int64", name="count") + return Series( + count, index=CategoricalIndex(ix), dtype="int64", name="count", copy=False + ) # error: Argument 2 of "_empty" is incompatible with supertype # "NDArrayBackedExtensionArray"; supertype defines the argument type as @@ -1758,7 +1760,9 @@ def _values_for_rank(self): # reorder the categories (so rank can use the float codes) # instead of passing an object array to rank values = np.array( - self.rename_categories(Series(self.categories).rank().values) + self.rename_categories( + Series(self.categories, copy=False).rank().values + ) ) return values diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 9b9cb3e29810d..6a88b48031d3f 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1002,7 +1002,7 @@ def value_counts(self, dropna: bool = True) -> Series: ) if dropna: - res = Series(value_counts, index=keys, name="count") + res = Series(value_counts, index=keys, name="count", copy=False) res.index = res.index.astype(self.dtype) res = res.astype("Int64") return res @@ -1018,7 +1018,7 @@ def value_counts(self, dropna: bool = True) -> Series: mask = np.zeros(len(counts), dtype="bool") counts_array = IntegerArray(counts, mask) - return Series(counts_array, index=index, name="count") + return Series(counts_array, index=index, name="count", copy=False) @doc(ExtensionArray.equals) def equals(self, other) -> bool: diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 7980638deb438..b3eb5db63296e 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -219,6 +219,7 @@ def to_dense(self) -> Series: self._parent.array.to_dense(), index=self._parent.index, name=self._parent.name, + copy=False, ) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index fcebd17ace2d3..9b55638b21ece 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -890,7 +890,7 @@ def value_counts(self, dropna: bool = True) -> Series: index = Index(keys) else: index = keys - return Series(counts, index=index) + return Series(counts, index=index, copy=False) # -------- # Indexing diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index 723449dfcd4a3..6ac1931f1ddba 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -195,7 +195,7 @@ def coo_to_sparse_series( from pandas import SparseDtype try: - ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) + ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col)), copy=False) except AttributeError as err: raise TypeError( f"Expected coo_matrix. Got {type(A).__name__} instead." diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 19fba398feb08..39cf1172403b4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -691,7 +691,7 @@ def value_counts( llab = lambda lab, inc: lab[inc] else: # lab is a Categorical with categories an IntervalIndex - cat_ser = cut(Series(val), bins, include_lowest=True) + cat_ser = cut(Series(val, copy=False), bins, include_lowest=True) cat_obj = cast("Categorical", cat_ser._values) lev = cat_obj.categories lab = lev.take( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c676999cf128d..dc7dff74369bb 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1439,7 +1439,7 @@ def _agg_py_fallback( if values.ndim == 1: # For DataFrameGroupBy we only get here with ExtensionArray - ser = Series(values) + ser = Series(values, copy=False) else: # We only get here with values.dtype == object # TODO: special case not needed with ArrayManager diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 236449881dc41..b1ee176c7f34a 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -101,7 +101,7 @@ def _delegate_property_get(self, name): index = self.orig.index else: index = self._parent.index - # return the result as a Series, which is by definition a copy + # return the result as a Series result = Series(result, index=index, name=self.name).__finalize__(self._parent) # setting this object will show a SettingWithCopyWarning/Error diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index d00fade8a1c6f..b907cf346260b 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -236,7 +236,7 @@ def _get_dummies_1d( from pandas.core.reshape.concat import concat # Series avoids inconsistent NaN handling - codes, levels = factorize_from_iterable(Series(data)) + codes, levels = factorize_from_iterable(Series(data, copy=False)) if dtype is None: dtype = np.dtype(bool) @@ -310,7 +310,7 @@ def get_empty_frame(data) -> DataFrame: fill_value=fill_value, dtype=dtype, ) - sparse_series.append(Series(data=sarr, index=index, name=col)) + sparse_series.append(Series(data=sarr, index=index, name=col, copy=False)) return concat(sparse_series, axis=1, copy=False) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 1b020a3d96411..2acfdcefed055 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -379,7 +379,7 @@ def _get_series_list(self, others): if isinstance(others, ABCSeries): return [others] elif isinstance(others, ABCIndex): - return [Series(others._values, index=idx, dtype=others.dtype)] + return [Series(others, index=idx, dtype=others.dtype)] elif isinstance(others, ABCDataFrame): return [others[x] for x in others] elif isinstance(others, np.ndarray) and others.ndim == 2: @@ -634,7 +634,7 @@ def cat( else: dtype = self._orig.dtype res_ser = Series( - result, dtype=dtype, index=data.index, name=self._orig.name + result, dtype=dtype, index=data.index, name=self._orig.name, copy=False ) out = res_ser.__finalize__(self._orig, method="str_cat") return out diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 508ac122d67af..4055092b82361 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -376,7 +376,7 @@ def _str_get_dummies(self, sep: str = "|"): arr = sep + arr.astype(str) + sep tags: set[str] = set() - for ts in Series(arr).str.split(sep): + for ts in Series(arr, copy=False).str.split(sep): tags.update(ts) tags2 = sorted(tags - {""}) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 3cd3dec185ccf..f74758b87f3a3 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -250,7 +250,7 @@ def _maybe_cache( cache_dates = convert_listlike(unique_dates, format) # GH#45319 try: - cache_array = Series(cache_dates, index=unique_dates) + cache_array = Series(cache_dates, index=unique_dates, copy=False) except OutOfBoundsDatetime: return cache_array # GH#39882 and GH#35888 in case of None and NaT we get duplicates diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index b61fa9a92539e..4dcbb4ed804db 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -730,7 +730,7 @@ def cov_func(x, y): self.ignore_na, bias, ) - return Series(result, index=x.index, name=x.name) + return Series(result, index=x.index, name=x.name, copy=False) return self._apply_pairwise( self._selected_obj, other, pairwise, cov_func, numeric_only @@ -807,7 +807,7 @@ def _cov(X, Y): x_var = _cov(x_array, x_array) y_var = _cov(y_array, y_array) result = cov / zsqrt(x_var * y_var) - return Series(result, index=x.index, name=x.name) + return Series(result, index=x.index, name=x.name, copy=False) return self._apply_pairwise( self._selected_obj, other, pairwise, cov_func, numeric_only diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 97b3f23d65b44..d3637d177b7f5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -383,7 +383,7 @@ def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None: if self.on is not None and not self._on.equals(obj.index): name = self._on.name - extra_col = Series(self._on, index=self.obj.index, name=name) + extra_col = Series(self._on, index=self.obj.index, name=name, copy=False) if name in result.columns: # TODO: sure we want to overwrite results? result[name] = extra_col @@ -1413,7 +1413,7 @@ def _generate_cython_apply_func( def apply_func(values, begin, end, min_periods, raw=raw): if not raw: # GH 45912 - values = Series(values, index=self._on) + values = Series(values, index=self._on, copy=False) return window_func(values, begin, end, min_periods) return apply_func @@ -1670,7 +1670,7 @@ def cov_func(x, y): notna(x_array + y_array).astype(np.float64), start, end, 0 ) result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof)) - return Series(result, index=x.index, name=x.name) + return Series(result, index=x.index, name=x.name, copy=False) return self._apply_pairwise( self._selected_obj, other, pairwise, cov_func, numeric_only @@ -1727,7 +1727,7 @@ def corr_func(x, y): ) denominator = (x_var * y_var) ** 0.5 result = numerator / denominator - return Series(result, index=x.index, name=x.name) + return Series(result, index=x.index, name=x.name, copy=False) return self._apply_pairwise( self._selected_obj, other, pairwise, corr_func, numeric_only diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 9542ae46a0d05..860b347b834fa 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1988,7 +1988,7 @@ def _do_convert_categoricals( # TODO: if we get a non-copying rename_categories, use that cat_data = cat_data.rename_categories(categories) except ValueError as err: - vc = Series(categories).value_counts() + vc = Series(categories, copy=False).value_counts() repeated_cats = list(vc.index[vc > 1]) repeats = "-" * 80 + "\n" + "\n".join(repeated_cats) # GH 25772 @@ -2005,7 +2005,7 @@ def _do_convert_categoricals( """ raise ValueError(msg) from err # TODO: is the next line needed above in the data(...) method? - cat_series = Series(cat_data, index=data.index) + cat_series = Series(cat_data, index=data.index, copy=False) cat_converted_data.append((col, cat_series)) else: cat_converted_data.append((col, data[col])) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index e2f30da1b839c..4181769458082 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -288,7 +288,7 @@ def _grouped_plot_by_column( ax_values.append(re_plotf) ax.grid(grid) - result = pd.Series(ax_values, index=columns) + result = pd.Series(ax_values, index=columns, copy=False) # Return axes in multiplot case, maybe revisit later # 985 if return_type is None: