Skip to content

BUG: ExtensionBlock.set not setting values inplace #32831

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 27 additions & 25 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pandas._libs.internals as libinternals
from pandas._libs.tslibs import Timedelta, conversion
from pandas._libs.tslibs.timezones import tz_compare
from pandas._typing import ArrayLike
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -340,11 +341,12 @@ def iget(self, i):

def set(self, locs, values):
"""
Modify Block in-place with new item value
Modify block values in-place with new item value.

Returns
-------
None
Notes
-----
`set` never creates a new array or new Block, whereas `setitem` _may_
create a new array and always creates a new Block.
"""
self.values[locs] = values

Expand Down Expand Up @@ -793,7 +795,7 @@ def _replace_single(self, *args, **kwargs):

def setitem(self, indexer, value):
"""
Set the value inplace, returning a a maybe different typed block.
Attempt self.values[indexer] = value, possibly creating a new array.

Parameters
----------
Expand Down Expand Up @@ -1635,12 +1637,15 @@ def iget(self, col):
raise IndexError(f"{self} only contains one item")
return self.values

def should_store(self, value):
def should_store(self, value: ArrayLike) -> bool:
"""
Can we set the given array-like value inplace?
"""
return isinstance(value, self._holder)

def set(self, locs, values, check=False):
def set(self, locs, values):
assert locs.tolist() == [0]
self.values = values
self.values[:] = values

def putmask(
self, mask, new, align=True, inplace=False, axis=0, transpose=False,
Expand Down Expand Up @@ -1751,7 +1756,7 @@ def is_numeric(self):

def setitem(self, indexer, value):
"""
Set the value inplace, returning a same-typed block.
Attempt self.values[indexer] = value, possibly creating a new array.

This differs from Block.setitem by not allowing setitem to change
the dtype of the Block.
Expand Down Expand Up @@ -2057,7 +2062,7 @@ def to_native_types(
)
return formatter.get_result_as_array()

def should_store(self, value) -> bool:
def should_store(self, value: ArrayLike) -> bool:
# when inserting a column should not coerce integers to floats
# unnecessarily
return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype
Expand All @@ -2075,7 +2080,7 @@ def _can_hold_element(self, element: Any) -> bool:
element, (float, int, complex, np.float_, np.int_)
) and not isinstance(element, (bool, np.bool_))

def should_store(self, value) -> bool:
def should_store(self, value: ArrayLike) -> bool:
return issubclass(value.dtype.type, np.complexfloating)


Expand All @@ -2094,7 +2099,7 @@ def _can_hold_element(self, element: Any) -> bool:
)
return is_integer(element)

def should_store(self, value) -> bool:
def should_store(self, value: ArrayLike) -> bool:
return is_integer_dtype(value) and value.dtype == self.dtype


Expand All @@ -2105,6 +2110,9 @@ class DatetimeLikeBlockMixin:
def _holder(self):
return DatetimeArray

def should_store(self, value):
return is_dtype_equal(self.dtype, value.dtype)

@property
def fill_value(self):
return np.datetime64("NaT", "ns")
Expand Down Expand Up @@ -2241,16 +2249,9 @@ def to_native_types(
).reshape(i8values.shape)
return np.atleast_2d(result)

def should_store(self, value) -> bool:
return is_datetime64_dtype(value.dtype)

def set(self, locs, values):
"""
Modify Block in-place with new item value

Returns
-------
None
See Block.set.__doc__
"""
values = conversion.ensure_datetime64ns(values, copy=False)

Expand All @@ -2274,6 +2275,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
_can_hold_element = DatetimeBlock._can_hold_element
to_native_types = DatetimeBlock.to_native_types
fill_value = np.datetime64("NaT", "ns")
should_store = DatetimeBlock.should_store

@property
def _holder(self):
Expand Down Expand Up @@ -2483,9 +2485,6 @@ def fillna(self, value, **kwargs):
)
return super().fillna(value, **kwargs)

def should_store(self, value) -> bool:
return is_timedelta64_dtype(value.dtype)

def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
""" convert to our native types format, slicing if desired """
values = self.values
Expand Down Expand Up @@ -2527,7 +2526,7 @@ def _can_hold_element(self, element: Any) -> bool:
return issubclass(tipo.type, np.bool_)
return isinstance(element, (bool, np.bool_))

def should_store(self, value) -> bool:
def should_store(self, value: ArrayLike) -> bool:
return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype(
value
)
Expand Down Expand Up @@ -2619,7 +2618,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
def _can_hold_element(self, element: Any) -> bool:
return True

def should_store(self, value) -> bool:
def should_store(self, value: ArrayLike) -> bool:
return not (
issubclass(
value.dtype.type,
Expand Down Expand Up @@ -2868,6 +2867,9 @@ def __init__(self, values, placement, ndim=None):
def _holder(self):
return Categorical

def should_store(self, arr: ArrayLike):
return isinstance(arr, self._holder) and is_dtype_equal(self.dtype, arr.dtype)

def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs):
""" convert to our native types format, slicing if desired """
values = self.values
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,17 @@ def test_series_indexing_zerodim_np_array(self):
result = s.iloc[np.array(0)]
assert result == 1

def test_iloc_setitem_categorical_updates_inplace(self):
# Mixed dtype ensures we go through take_split_path in setitem_with_indexer
cat = pd.Categorical(["A", "B", "C"])
df = pd.DataFrame({1: cat, 2: [1, 2, 3]})

# This should modify our original values in-place
df.iloc[:, 0] = cat[::-1]

expected = pd.Categorical(["C", "B", "A"])
tm.assert_categorical_equal(cat, expected)


class TestILocSetItemDuplicateColumns:
def test_iloc_setitem_scalar_duplicate_columns(self):
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,23 @@ def test_binop_other(self, op, value, dtype):
tm.assert_series_equal(result, expected)


class TestShouldStore:
def test_should_store_categorical(self):
cat = pd.Categorical(["A", "B", "C"])
df = pd.DataFrame(cat)
blk = df._data.blocks[0]

# matching dtype
assert blk.should_store(cat)
assert blk.should_store(cat[:-1])

# different dtype
assert not blk.should_store(cat.as_ordered())

# ndarray instead of Categorical
assert not blk.should_store(np.asarray(cat))


@pytest.mark.parametrize(
"typestr, holder",
[
Expand Down