Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: mask values in loc.__setitem__ with bool indexer #45501

Merged
merged 10 commits into from
Jan 31, 2022
73 changes: 71 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
is_empty_indexer,
is_exact_shape_match,
is_list_like_indexer,
is_scalar_indexer,
length_of_indexer,
)
from pandas.core.indexes.api import (
Expand Down Expand Up @@ -669,6 +670,71 @@ def _get_setitem_indexer(self, key):

return self._convert_to_indexer(key, axis=0)

@final
def _maybe_mask_setitem_value(self, indexer, value):
"""
If we have obj.iloc[mask] = series_or_frame and series_or_frame has the
same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],
similar to Series.__setitem__.

Note this is only for loc, not iloc.
"""

if (
isinstance(indexer, tuple)
and len(indexer) == 2
and isinstance(value, (ABCSeries, ABCDataFrame))
):
pi, icols = indexer
ndim = value.ndim
if com.is_bool_indexer(pi) and len(value) == len(pi):
newkey = pi.nonzero()[0]

if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:
# e.g. test_loc_setitem_boolean_mask_allfalse
if len(newkey) == 0:
# FIXME: kludge for test_loc_setitem_boolean_mask_allfalse
# TODO(GH#45333): may be fixed when deprecation is enforced

value = value.iloc[:0]
else:
# test_loc_setitem_ndframe_values_alignment
value = self.obj.iloc._align_series(indexer, value)
indexer = (newkey, icols)

elif (
isinstance(icols, np.ndarray)
and icols.dtype.kind == "i"
and len(icols) == 1
):
if ndim == 1:
# We implicitly broadcast, though numpy does not, see
# github.com/pandas-dev/pandas/pull/45501#discussion_r789071825
if len(newkey) == 0:
# FIXME: kludge for
# test_setitem_loc_only_false_indexer_dtype_changed
# TODO(GH#45333): may be fixed when deprecation is enforced
value = value.iloc[:0]
else:
# test_loc_setitem_ndframe_values_alignment
value = self.obj.iloc._align_series(indexer, value)
indexer = (newkey, icols)

elif ndim == 2 and value.shape[1] == 1:
if len(newkey) == 0:
# FIXME: kludge for
# test_loc_setitem_all_false_boolean_two_blocks
# TODO(GH#45333): may be fixed when deprecation is enforced
value = value.iloc[:0]
else:
# test_loc_setitem_ndframe_values_alignment
value = self.obj.iloc._align_frame(indexer, value)
indexer = (newkey, icols)
elif com.is_bool_indexer(indexer):
indexer = indexer.nonzero()[0]

return indexer, value

@final
def _tupleize_axis_indexer(self, key) -> tuple:
"""
Expand Down Expand Up @@ -1299,8 +1365,7 @@ def _convert_to_indexer(self, key, axis: int):

if com.is_bool_indexer(key):
key = check_bool_indexer(labels, key)
(inds,) = key.nonzero()
return inds
return key
else:
return self._get_listlike_indexer(key, axis)[1]
else:
Expand Down Expand Up @@ -1696,6 +1761,10 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
self._setitem_with_indexer_missing(indexer, value)
return

if name == "loc":
# must come after setting of missing
indexer, value = self._maybe_mask_setitem_value(indexer, value)

# align and set the values
if take_split_path:
# We have to operate column-wise
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ def setitem(self, indexer, value):

if is_empty_indexer(indexer):
# GH#8669 empty indexers, test_loc_setitem_boolean_mask_allfalse
pass
values[indexer] = value

elif is_scalar_indexer(indexer, self.ndim):
# setting a single element for each dim and with a rhs that could
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2446,6 +2446,31 @@ def test_loc_setitem_boolean_and_column(self, float_frame):

tm.assert_frame_equal(float_frame, expected)

def test_loc_setitem_ndframe_values_alignment(self):
# GH#45501
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df.loc[[False, False, True], ["a"]] = DataFrame(
{"a": [10, 20, 30]}, index=[2, 1, 0]
)

expected = DataFrame({"a": [1, 2, 10], "b": [4, 5, 6]})
tm.assert_frame_equal(df, expected)

# same thing with Series RHS
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df.loc[[False, False, True], ["a"]] = Series([10, 11, 12], index=[2, 1, 0])
tm.assert_frame_equal(df, expected)

# same thing but setting "a" instead of ["a"]
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df.loc[[False, False, True], "a"] = Series([10, 11, 12], index=[2, 1, 0])
tm.assert_frame_equal(df, expected)

df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
ser = df["a"]
ser.loc[[False, False, True]] = Series([10, 11, 12], index=[2, 1, 0])
tm.assert_frame_equal(df, expected)


class TestLocListlike:
@pytest.mark.parametrize("box", [lambda x: x, np.asarray, list])
Expand Down