Skip to content

Fix scalar iloc rebase #17163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1085,3 +1085,24 @@ def cast_scalar_to_array(shape, value, dtype=None):
values.fill(fill_value)

return values


def _maybe_convert_indexer(indexer, until):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is likely duplicating some code in pandas.core.indexing. Also doesn't belong here as its purely an indexing converter (so put it in pandas.core.indexing)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also doesn't belong here as its purely an indexing converter (so put it in pandas.core.indexing)

I will use it also in BlockManager: can you confirm you suggestion is still valid?

Copy link
Contributor

@jreback jreback Aug 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's fine. it just belongs in internal utlities related to indexing (and not internal casting type things). yes it is casting but is purely related to indexing (and not data types)

"""
Convert slice, tuple, list or scalar "indexer" to 1-d array of indices,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be ok to first do a PR to add a level to our indexing structure of code, IOW

pandas.core.indexing -> pandas.core.indexing.indexing, then you add other help modules as needed
e.g. pands.core.indexing.cast for example (where things like this could live).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for completeness: recall that part of this (i.e. the InfoCleaner) is temporary, until I finish the work on BlockManager (while _maybe_convert_to_indexer will stay - it is used by the BlockManager too - as well as part of the new _setter). In the long run, I expect the amount and complexity of indexing code to actually decrease. So I'm a bit against more hierarchy. Actually, if you find this PR too messy, I'd rather close it and just wait for the work on BlockManager (which however will take more time, so the bug will have to wait).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I'd rather not introduce a 'temporary' fix.

using "until" as maximum for upwards open slices.
"""

if is_scalar(indexer):
return np.array([indexer], dtype=int)

if isinstance(indexer, np.ndarray):
if indexer.dtype == bool:
return np.where(indexer)[0]
return indexer

if isinstance(indexer, slice):
stop = until if indexer.stop is None else indexer.stop
return np.arange(stop, dtype=int)[indexer]

return np.array(indexer, dtype=int)
138 changes: 105 additions & 33 deletions pandas/core/indexing.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
_is_unorderable_exception,
_ensure_platform_int)
from pandas.core.dtypes.missing import isna, _infer_fill_value
from pandas.core.dtypes.cast import _maybe_convert_indexer

from pandas.core.index import Index, MultiIndex

Expand Down Expand Up @@ -81,6 +82,24 @@ def __getitem__(self, arg):
IndexSlice = _IndexSlice()


class InfoCleaner:
"""
A context manager which temporarily removes labels on the "info" axis,
replacing them with a RangeIndex, and then puts them back in place.
Used to unambiguously index by position.
"""
def __init__(self, obj):
self._obj = obj
self._info_axis = self._obj._AXIS_NAMES[self._obj._info_axis_number]

def __enter__(self):
self._old_col = getattr(self._obj, self._info_axis)
setattr(self._obj, self._info_axis, range(len(self._old_col)))

def __exit__(self, *args):
setattr(self._obj, self._info_axis, self._old_col)


class IndexingError(Exception):
pass

Expand Down Expand Up @@ -492,29 +511,10 @@ def _setitem_with_indexer(self, indexer, value):
else:
lplane_indexer = 0

def setter(item, v):
s = self.obj[item]
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer

# perform the equivalent of a setitem on the info axis
# as we have a null slice or a slice with full bounds
# which means essentially reassign to the columns of a
# multi-dim object
# GH6149 (null slice), GH10408 (full bounds)
if (isinstance(pi, tuple) and
all(is_null_slice(idx) or
is_full_slice(idx, len(self.obj))
for idx in pi)):
s = v
else:
# set the item, possibly having a dtype change
s._consolidate_inplace()
s = s.copy()
s._data = s._data.setitem(indexer=pi, value=v)
s._maybe_update_cacher(clear=True)

# reset the sliced object if unique
self.obj[item] = s
setter_kwargs = {'items': labels,
'indexer': indexer,
'pi': plane_indexer[0] if lplane_indexer == 1
else plane_indexer}

def can_do_equal_len():
""" return True if we have an equal len settable """
Expand Down Expand Up @@ -542,7 +542,7 @@ def can_do_equal_len():
sub_indexer = list(indexer)
multiindex_indexer = isinstance(labels, MultiIndex)

for item in labels:
for idx, item in enumerate(labels):
if item in value:
sub_indexer[info_axis] = item
v = self._align_series(
Expand All @@ -551,7 +551,7 @@ def can_do_equal_len():
else:
v = np.nan

setter(item, v)
self._setter(idx, v, force_loc=True, **setter_kwargs)

# we have an equal len ndarray/convertible to our labels
elif np.array(value).ndim == 2:
Expand All @@ -563,14 +563,15 @@ def can_do_equal_len():
raise ValueError('Must have equal len keys and value '
'when setting with an ndarray')

for i, item in enumerate(labels):
for i in range(len(labels)):

# setting with a list, recoerces
setter(item, value[:, i].tolist())
self._setter(i, value[:, i].tolist(), force_loc=True,
**setter_kwargs)

# we have an equal len list/ndarray
elif can_do_equal_len():
setter(labels[0], value)
self._setter(0, value, **setter_kwargs)

# per label values
else:
Expand All @@ -579,13 +580,12 @@ def can_do_equal_len():
raise ValueError('Must have equal len keys and value '
'when setting with an iterable')

for item, v in zip(labels, value):
setter(item, v)
for i, v in zip(range(len(labels)), value):
self._setter(i, v, **setter_kwargs)
else:

# scalar
for item in labels:
setter(item, value)
for idx in range(len(labels)):
self._setter(idx, value, **setter_kwargs)

else:
if isinstance(indexer, tuple):
Expand Down Expand Up @@ -619,6 +619,47 @@ def can_do_equal_len():
value=value)
self.obj._maybe_update_cacher(clear=True)

def _setter(self, idx, v, items, pi, **kwargs):
"""
Set a single value on the underlying object. Label-based.

Parameters
----------
idx : int
The index of the desired element inside "items"

v : any
The value to assign to the specified location

items: list
A list of labels

pi: tuple or list-like
Components of original indexer preceding the info axis
"""
item = items[idx]
s = self.obj[item]

# perform the equivalent of a setitem on the info axis
# as we have a null slice or a slice with full bounds
# which means essentially reassign to the columns of a
# multi-dim object
# GH6149 (null slice), GH10408 (full bounds)
if (isinstance(pi, tuple) and
all(is_null_slice(ix) or
is_full_slice(ix, len(self.obj))
for ix in pi)):
s = v
else:
# set the item, possibly having a dtype change
s._consolidate_inplace()
s = s.copy()
s._data = s._data.setitem(indexer=pi, value=v)
s._maybe_update_cacher(clear=True)

# reset the sliced object if unique
self.obj[item] = s

def _align_series(self, indexer, ser, multiindex_indexer=False):
"""
Parameters
Expand Down Expand Up @@ -1772,6 +1813,37 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
raise ValueError("Can only index by location with a [%s]" %
self._valid_types)

def _setter(self, idx, v, indexer, force_loc=False, **kwargs):
"""
Set a single value on the underlying object. Position-based by default.

Parameters
----------
idx : int
The index of the desired element

v : any
The value to assign to the specified location

indexer: list
The original indexer

force_loc: bool
If True, use location-based indexing.

Other keyword arguments are forwarded to _NDFrameIndexer._setter()
"""

if force_loc:
super(_iLocIndexer, self)._setter(idx, v, **kwargs)
else:
info_axis = self.obj._info_axis_number
max_idx = len(self.obj._get_axis(info_axis))
kwargs['items'] = _maybe_convert_indexer(indexer[info_axis],
max_idx)
with InfoCleaner(self.obj):
super(_iLocIndexer, self)._setter(idx, v, **kwargs)


class _ScalarAccessIndexer(_NDFrameIndexer):
""" access scalars quickly """
Expand Down
36 changes: 34 additions & 2 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,15 +301,33 @@ def test_iloc_setitem_dups(self):
# iloc with a mask aligning from another iloc
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
df = concat([df1, df2], axis=1)
df_orig = concat([df1, df2], axis=1)
df = df_orig.copy()

# GH 15686
# iloc with mask, duplicated index and multiple blocks
expected = df.fillna(3)
expected['A'] = expected['A'].astype('float64')
expected.iloc[:, 0] = expected.iloc[:, 0].astype('float64')
inds = np.isnan(df.iloc[:, 0])
mask = inds[inds].index
df.iloc[mask, 0] = df.iloc[mask, 2]
tm.assert_frame_equal(df, expected)

# GH 15686
# iloc with scalar, duplicated index and multiple blocks
df = df_orig.copy()
expected = df.fillna(15)
df.iloc[0, 0] = 15
tm.assert_frame_equal(df, expected)

# GH 15686
# iloc with repeated value, duplicated index and multiple blocks
df = df_orig.copy()
expected = concat([DataFrame([{'A': 15, 'B': 1}, {'A': 15, 'B': 2}]),
df2], axis=1)
df.iloc[:, 0] = 15
tm.assert_frame_equal(df, expected)

# del a dup column across blocks
expected = DataFrame({0: [1, 2], 1: [3, 4]})
expected.columns = ['B', 'B']
Expand All @@ -327,6 +345,20 @@ def test_iloc_setitem_dups(self):
drop=True)
tm.assert_frame_equal(df, expected)

@pytest.mark.xfail(reason="BlockManager.setitem() broken")
def test_iloc_setitem_dups_slice(self):
# GH 12991
df = DataFrame(index=['a', 'b', 'c'],
columns=['d', 'e', 'd']).fillna(0)
expected = df.iloc[:, 2].copy(deep=True)

first_col = df.iloc[:, 0]
first_col['a'] = 3

result = df.iloc[:, 2]

tm.assert_series_equal(result, expected)

def test_iloc_getitem_frame(self):
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2),
columns=lrange(0, 8, 2))
Expand Down