Skip to content

Commit

Permalink
Backport PR #52068 on branch 2.0.x (CoW: Switch to copy=False everywh…
Browse files Browse the repository at this point in the history
…ere for Series constructor) (#52367)

Backport PR #52068: CoW: Switch to copy=False everywhere for Series constructor

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl authored Apr 3, 2023
1 parent 9bf45db commit 5d69654
Show file tree
Hide file tree
Showing 19 changed files with 34 additions and 29 deletions.
6 changes: 3 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ def value_counts(
if bins is not None:
from pandas.core.reshape.tile import cut

values = Series(values)
values = Series(values, copy=False)
try:
ii = cut(values, bins, include_lowest=True)
except TypeError as err:
Expand All @@ -860,7 +860,7 @@ def value_counts(
else:
if is_extension_array_dtype(values):
# handle Categorical and sparse,
result = Series(values)._values.value_counts(dropna=dropna)
result = Series(values, copy=False)._values.value_counts(dropna=dropna)
result.name = name
result.index.name = index_name
counts = result._values
Expand Down Expand Up @@ -892,7 +892,7 @@ def value_counts(
idx = idx.astype(object)
idx.name = index_name

result = Series(counts, index=idx, name=name)
result = Series(counts, index=idx, name=name, copy=False)

if sort:
result = result.sort_values(ascending=ascending)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ def value_counts(self, dropna: bool = True) -> Series:

index_arr = self._from_backing_data(np.asarray(result.index._data))
index = Index(index_arr, name=result.index.name)
return Series(result._values, index=index, name=result.name)
return Series(result._values, index=index, name=result.name, copy=False)

def _quantile(
self: NDArrayBackedExtensionArrayT,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ def value_counts(self, dropna: bool = True) -> Series:

index = Index(type(self)(values))

return Series(counts, index=index, name="count")
return Series(counts, index=index, name="count", copy=False)

@classmethod
def _concat_same_type(
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1500,7 +1500,9 @@ def value_counts(self, dropna: bool = True) -> Series:
ix = coerce_indexer_dtype(ix, self.dtype.categories)
ix = self._from_backing_data(ix)

return Series(count, index=CategoricalIndex(ix), dtype="int64", name="count")
return Series(
count, index=CategoricalIndex(ix), dtype="int64", name="count", copy=False
)

# error: Argument 2 of "_empty" is incompatible with supertype
# "NDArrayBackedExtensionArray"; supertype defines the argument type as
Expand Down Expand Up @@ -1758,7 +1760,9 @@ def _values_for_rank(self):
# reorder the categories (so rank can use the float codes)
# instead of passing an object array to rank
values = np.array(
self.rename_categories(Series(self.categories).rank().values)
self.rename_categories(
Series(self.categories, copy=False).rank().values
)
)
return values

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ def value_counts(self, dropna: bool = True) -> Series:
)

if dropna:
res = Series(value_counts, index=keys, name="count")
res = Series(value_counts, index=keys, name="count", copy=False)
res.index = res.index.astype(self.dtype)
res = res.astype("Int64")
return res
Expand All @@ -1018,7 +1018,7 @@ def value_counts(self, dropna: bool = True) -> Series:
mask = np.zeros(len(counts), dtype="bool")
counts_array = IntegerArray(counts, mask)

return Series(counts_array, index=index, name="count")
return Series(counts_array, index=index, name="count", copy=False)

@doc(ExtensionArray.equals)
def equals(self, other) -> bool:
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/sparse/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ def to_dense(self) -> Series:
self._parent.array.to_dense(),
index=self._parent.index,
name=self._parent.name,
copy=False,
)


Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,7 @@ def value_counts(self, dropna: bool = True) -> Series:
index = Index(keys)
else:
index = keys
return Series(counts, index=index)
return Series(counts, index=index, copy=False)

# --------
# Indexing
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/scipy_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def coo_to_sparse_series(
from pandas import SparseDtype

try:
ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col)), copy=False)
except AttributeError as err:
raise TypeError(
f"Expected coo_matrix. Got {type(A).__name__} instead."
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ def value_counts(
llab = lambda lab, inc: lab[inc]
else:
# lab is a Categorical with categories an IntervalIndex
cat_ser = cut(Series(val), bins, include_lowest=True)
cat_ser = cut(Series(val, copy=False), bins, include_lowest=True)
cat_obj = cast("Categorical", cat_ser._values)
lev = cat_obj.categories
lab = lev.take(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1439,7 +1439,7 @@ def _agg_py_fallback(

if values.ndim == 1:
# For DataFrameGroupBy we only get here with ExtensionArray
ser = Series(values)
ser = Series(values, copy=False)
else:
# We only get here with values.dtype == object
# TODO: special case not needed with ArrayManager
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def _delegate_property_get(self, name):
index = self.orig.index
else:
index = self._parent.index
# return the result as a Series, which is by definition a copy
# return the result as a Series
result = Series(result, index=index, name=self.name).__finalize__(self._parent)

# setting this object will show a SettingWithCopyWarning/Error
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/reshape/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def _get_dummies_1d(
from pandas.core.reshape.concat import concat

# Series avoids inconsistent NaN handling
codes, levels = factorize_from_iterable(Series(data))
codes, levels = factorize_from_iterable(Series(data, copy=False))

if dtype is None:
dtype = np.dtype(bool)
Expand Down Expand Up @@ -310,7 +310,7 @@ def get_empty_frame(data) -> DataFrame:
fill_value=fill_value,
dtype=dtype,
)
sparse_series.append(Series(data=sarr, index=index, name=col))
sparse_series.append(Series(data=sarr, index=index, name=col, copy=False))

return concat(sparse_series, axis=1, copy=False)

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def _get_series_list(self, others):
if isinstance(others, ABCSeries):
return [others]
elif isinstance(others, ABCIndex):
return [Series(others._values, index=idx, dtype=others.dtype)]
return [Series(others, index=idx, dtype=others.dtype)]
elif isinstance(others, ABCDataFrame):
return [others[x] for x in others]
elif isinstance(others, np.ndarray) and others.ndim == 2:
Expand Down Expand Up @@ -634,7 +634,7 @@ def cat(
else:
dtype = self._orig.dtype
res_ser = Series(
result, dtype=dtype, index=data.index, name=self._orig.name
result, dtype=dtype, index=data.index, name=self._orig.name, copy=False
)
out = res_ser.__finalize__(self._orig, method="str_cat")
return out
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def _str_get_dummies(self, sep: str = "|"):
arr = sep + arr.astype(str) + sep

tags: set[str] = set()
for ts in Series(arr).str.split(sep):
for ts in Series(arr, copy=False).str.split(sep):
tags.update(ts)
tags2 = sorted(tags - {""})

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def _maybe_cache(
cache_dates = convert_listlike(unique_dates, format)
# GH#45319
try:
cache_array = Series(cache_dates, index=unique_dates)
cache_array = Series(cache_dates, index=unique_dates, copy=False)
except OutOfBoundsDatetime:
return cache_array
# GH#39882 and GH#35888 in case of None and NaT we get duplicates
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,7 @@ def cov_func(x, y):
self.ignore_na,
bias,
)
return Series(result, index=x.index, name=x.name)
return Series(result, index=x.index, name=x.name, copy=False)

return self._apply_pairwise(
self._selected_obj, other, pairwise, cov_func, numeric_only
Expand Down Expand Up @@ -807,7 +807,7 @@ def _cov(X, Y):
x_var = _cov(x_array, x_array)
y_var = _cov(y_array, y_array)
result = cov / zsqrt(x_var * y_var)
return Series(result, index=x.index, name=x.name)
return Series(result, index=x.index, name=x.name, copy=False)

return self._apply_pairwise(
self._selected_obj, other, pairwise, cov_func, numeric_only
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None:

if self.on is not None and not self._on.equals(obj.index):
name = self._on.name
extra_col = Series(self._on, index=self.obj.index, name=name)
extra_col = Series(self._on, index=self.obj.index, name=name, copy=False)
if name in result.columns:
# TODO: sure we want to overwrite results?
result[name] = extra_col
Expand Down Expand Up @@ -1413,7 +1413,7 @@ def _generate_cython_apply_func(
def apply_func(values, begin, end, min_periods, raw=raw):
if not raw:
# GH 45912
values = Series(values, index=self._on)
values = Series(values, index=self._on, copy=False)
return window_func(values, begin, end, min_periods)

return apply_func
Expand Down Expand Up @@ -1670,7 +1670,7 @@ def cov_func(x, y):
notna(x_array + y_array).astype(np.float64), start, end, 0
)
result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof))
return Series(result, index=x.index, name=x.name)
return Series(result, index=x.index, name=x.name, copy=False)

return self._apply_pairwise(
self._selected_obj, other, pairwise, cov_func, numeric_only
Expand Down Expand Up @@ -1727,7 +1727,7 @@ def corr_func(x, y):
)
denominator = (x_var * y_var) ** 0.5
result = numerator / denominator
return Series(result, index=x.index, name=x.name)
return Series(result, index=x.index, name=x.name, copy=False)

return self._apply_pairwise(
self._selected_obj, other, pairwise, corr_func, numeric_only
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1988,7 +1988,7 @@ def _do_convert_categoricals(
# TODO: if we get a non-copying rename_categories, use that
cat_data = cat_data.rename_categories(categories)
except ValueError as err:
vc = Series(categories).value_counts()
vc = Series(categories, copy=False).value_counts()
repeated_cats = list(vc.index[vc > 1])
repeats = "-" * 80 + "\n" + "\n".join(repeated_cats)
# GH 25772
Expand All @@ -2005,7 +2005,7 @@ def _do_convert_categoricals(
"""
raise ValueError(msg) from err
# TODO: is the next line needed above in the data(...) method?
cat_series = Series(cat_data, index=data.index)
cat_series = Series(cat_data, index=data.index, copy=False)
cat_converted_data.append((col, cat_series))
else:
cat_converted_data.append((col, data[col]))
Expand Down
2 changes: 1 addition & 1 deletion pandas/plotting/_matplotlib/boxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def _grouped_plot_by_column(
ax_values.append(re_plotf)
ax.grid(grid)

result = pd.Series(ax_values, index=columns)
result = pd.Series(ax_values, index=columns, copy=False)

# Return axes in multiplot case, maybe revisit later # 985
if return_type is None:
Expand Down

0 comments on commit 5d69654

Please sign in to comment.