From 84efa459d13e53be799836d5010ae3aad57e16fe Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Tue, 14 Mar 2017 12:57:14 +0100
Subject: [PATCH 1/3] BUG: Fix iloc with duplicate labels

closes #15686
---
 pandas/core/dtypes/cast.py |  21 ++++++
 pandas/core/indexing.py    | 138 ++++++++++++++++++++++++++++---------
 2 files changed, 126 insertions(+), 33 deletions(-)
 mode change 100755 => 100644 pandas/core/indexing.py

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 723e4f70da4e9..537c929451d69 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1085,3 +1085,24 @@ def cast_scalar_to_array(shape, value, dtype=None):
     values.fill(fill_value)
 
     return values
+
+
+def _maybe_convert_indexer(indexer, until):
+    """
+    Convert slice, tuple, list or scalar "indexer" to 1-d array of indices,
+    using "until" as maximum for upwards open slices.
+    """
+
+    if is_scalar(indexer):
+        return np.array([indexer], dtype=int)
+
+    if isinstance(indexer, np.ndarray):
+        if indexer.dtype == bool:
+            return np.where(indexer)[0]
+        return indexer
+
+    if isinstance(indexer, slice):
+        stop = until if indexer.stop is None else indexer.stop
+        return np.arange(stop, dtype=int)[indexer]
+
+    return np.array(indexer, dtype=int)
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
old mode 100755
new mode 100644
index 109183827de4e..75888fe25fd74
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -16,6 +16,7 @@
     _is_unorderable_exception,
     _ensure_platform_int)
 from pandas.core.dtypes.missing import isna, _infer_fill_value
+from pandas.core.dtypes.cast import _maybe_convert_indexer
 
 from pandas.core.index import Index, MultiIndex
 
@@ -81,6 +82,24 @@ def __getitem__(self, arg):
 IndexSlice = _IndexSlice()
 
 
+class InfoCleaner:
+    """
+    A context manager which temporarily removes labels on the "info" axis,
+    replacing them with a RangeIndex, and then puts them back in place.
+    Used to unambiguously index by position.
+    """
+    def __init__(self, obj):
+        self._obj = obj
+        self._info_axis = self._obj._AXIS_NAMES[self._obj._info_axis_number]
+
+    def __enter__(self):
+        self._old_col = getattr(self._obj, self._info_axis)
+        setattr(self._obj, self._info_axis, range(len(self._old_col)))
+
+    def __exit__(self, *args):
+        setattr(self._obj, self._info_axis, self._old_col)
+
+
 class IndexingError(Exception):
     pass
 
@@ -492,29 +511,10 @@ def _setitem_with_indexer(self, indexer, value):
                 else:
                     lplane_indexer = 0
 
-            def setter(item, v):
-                s = self.obj[item]
-                pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
-
-                # perform the equivalent of a setitem on the info axis
-                # as we have a null slice or a slice with full bounds
-                # which means essentially reassign to the columns of a
-                # multi-dim object
-                # GH6149 (null slice), GH10408 (full bounds)
-                if (isinstance(pi, tuple) and
-                        all(is_null_slice(idx) or
-                            is_full_slice(idx, len(self.obj))
-                            for idx in pi)):
-                    s = v
-                else:
-                    # set the item, possibly having a dtype change
-                    s._consolidate_inplace()
-                    s = s.copy()
-                    s._data = s._data.setitem(indexer=pi, value=v)
-                    s._maybe_update_cacher(clear=True)
-
-                # reset the sliced object if unique
-                self.obj[item] = s
+            setter_kwargs = {'items': labels,
+                             'indexer': indexer,
+                             'pi': plane_indexer[0] if lplane_indexer == 1
+                             else plane_indexer}
 
             def can_do_equal_len():
                 """ return True if we have an equal len settable """
@@ -542,7 +542,7 @@ def can_do_equal_len():
                     sub_indexer = list(indexer)
                     multiindex_indexer = isinstance(labels, MultiIndex)
 
-                    for item in labels:
+                    for idx, item in enumerate(labels):
                         if item in value:
                             sub_indexer[info_axis] = item
                             v = self._align_series(
@@ -551,7 +551,7 @@ def can_do_equal_len():
                         else:
                             v = np.nan
 
-                        setter(item, v)
+                        self._setter(idx, v, force_loc=True, **setter_kwargs)
 
                 # we have an equal len ndarray/convertible to our labels
                 elif np.array(value).ndim == 2:
@@ -563,14 +563,15 @@ def can_do_equal_len():
                         raise ValueError('Must have equal len keys and value '
                                          'when setting with an ndarray')
 
-                    for i, item in enumerate(labels):
+                    for i in range(len(labels)):
 
                         # setting with a list, recoerces
-                        setter(item, value[:, i].tolist())
+                        self._setter(i, value[:, i].tolist(), force_loc=True,
+                                     **setter_kwargs)
 
                 # we have an equal len list/ndarray
                 elif can_do_equal_len():
-                    setter(labels[0], value)
+                    self._setter(0, value, **setter_kwargs)
 
                 # per label values
                 else:
@@ -579,13 +580,12 @@ def can_do_equal_len():
                         raise ValueError('Must have equal len keys and value '
                                          'when setting with an iterable')
 
-                    for item, v in zip(labels, value):
-                        setter(item, v)
+                    for i, v in zip(range(len(labels)), value):
+                        self._setter(i, v, **setter_kwargs)
             else:
-
                 # scalar
-                for item in labels:
-                    setter(item, value)
+                for idx in range(len(labels)):
+                    self._setter(idx, value, **setter_kwargs)
 
         else:
             if isinstance(indexer, tuple):
@@ -619,6 +619,47 @@ def can_do_equal_len():
                                                     value=value)
             self.obj._maybe_update_cacher(clear=True)
 
+    def _setter(self, idx, v, items, pi, **kwargs):
+        """
+        Set a single value on the underlying object. Label-based.
+
+        Parameters
+        ----------
+        idx : int
+            The index of the desired element inside "items"
+
+        v : any
+            The value to assign to the specified location
+
+        items: list
+            A list of labels
+
+        pi: tuple or list-like
+            Components of original indexer preceding the info axis
+        """
+        item = items[idx]
+        s = self.obj[item]
+
+        # perform the equivalent of a setitem on the info axis
+        # as we have a null slice or a slice with full bounds
+        # which means essentially reassign to the columns of a
+        # multi-dim object
+        # GH6149 (null slice), GH10408 (full bounds)
+        if (isinstance(pi, tuple) and
+                all(is_null_slice(ix) or
+                    is_full_slice(ix, len(self.obj))
+                    for ix in pi)):
+            s = v
+        else:
+            # set the item, possibly having a dtype change
+            s._consolidate_inplace()
+            s = s.copy()
+            s._data = s._data.setitem(indexer=pi, value=v)
+            s._maybe_update_cacher(clear=True)
+
+        # reset the sliced object if unique
+        self.obj[item] = s
+
     def _align_series(self, indexer, ser, multiindex_indexer=False):
         """
         Parameters
@@ -1772,6 +1813,37 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
         raise ValueError("Can only index by location with a [%s]" %
                          self._valid_types)
 
+    def _setter(self, idx, v, indexer, force_loc=False, **kwargs):
+        """
+        Set a single value on the underlying object. Position-based by default.
+
+        Parameters
+        ----------
+        idx : int
+            The index of the desired element
+
+        v : any
+            The value to assign to the specified location
+
+        indexer: list
+            The original indexer
+
+        force_loc: bool
+            If True, use location-based indexing.
+
+        Other keyword arguments are forwarded to _NDFrameIndexer._setter()
+        """
+
+        if force_loc:
+            super(_iLocIndexer, self)._setter(idx, v, **kwargs)
+        else:
+            info_axis = self.obj._info_axis_number
+            max_idx = len(self.obj._get_axis(info_axis))
+            kwargs['items'] = _maybe_convert_indexer(indexer[info_axis],
+                                                     max_idx)
+            with InfoCleaner(self.obj):
+                super(_iLocIndexer, self)._setter(idx, v, **kwargs)
+
 
 class _ScalarAccessIndexer(_NDFrameIndexer):
     """ access scalars quickly """

From 4a5d18d7a5f1a0cd62972ea6cadea466212e5a25 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Tue, 14 Mar 2017 16:48:55 +0100
Subject: [PATCH 2/3] TST: fixed one tests and added two more for #15686

---
 pandas/tests/indexing/test_iloc.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 31fee303a41e2..64c12a472f71e 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -301,15 +301,33 @@ def test_iloc_setitem_dups(self):
         # iloc with a mask aligning from another iloc
         df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
         df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
-        df = concat([df1, df2], axis=1)
+        df_orig = concat([df1, df2], axis=1)
+        df = df_orig.copy()
 
+        # GH 15686
+        # iloc with mask, duplicated index and multiple blocks
         expected = df.fillna(3)
-        expected['A'] = expected['A'].astype('float64')
+        expected.iloc[:, 0] = expected.iloc[:, 0].astype('float64')
         inds = np.isnan(df.iloc[:, 0])
         mask = inds[inds].index
         df.iloc[mask, 0] = df.iloc[mask, 2]
         tm.assert_frame_equal(df, expected)
 
+        # GH 15686
+        # iloc with scalar, duplicated index and multiple blocks
+        df = df_orig.copy()
+        expected = df.fillna(15)
+        df.iloc[0, 0] = 15
+        tm.assert_frame_equal(df, expected)
+
+        # GH 15686
+        # iloc with repeated value, duplicated index and multiple blocks
+        df = df_orig.copy()
+        expected = concat([DataFrame([{'A': 15, 'B': 1}, {'A': 15, 'B': 2}]),
+                          df2], axis=1)
+        df.iloc[:, 0] = 15
+        tm.assert_frame_equal(df, expected)
+
         # del a dup column across blocks
         expected = DataFrame({0: [1, 2], 1: [3, 4]})
         expected.columns = ['B', 'B']

From b8ee051d5d77496688eb19f6673c900ec4cfedc4 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Tue, 14 Mar 2017 17:12:08 +0100
Subject: [PATCH 3/3] TST: added xfailing test on iloc with dups

---
 pandas/tests/indexing/test_iloc.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 64c12a472f71e..e7810b9035407 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -345,6 +345,20 @@ def test_iloc_setitem_dups(self):
             drop=True)
         tm.assert_frame_equal(df, expected)
 
+    @pytest.mark.xfail(reason="BlockManager.setitem() broken")
+    def test_iloc_setitem_dups_slice(self):
+        # GH 12991
+        df = DataFrame(index=['a', 'b', 'c'],
+                       columns=['d', 'e', 'd']).fillna(0)
+        expected = df.iloc[:, 2].copy(deep=True)
+
+        first_col = df.iloc[:, 0]
+        first_col['a'] = 3
+
+        result = df.iloc[:, 2]
+
+        tm.assert_series_equal(result, expected)
+
     def test_iloc_getitem_frame(self):
         df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2),
                        columns=lrange(0, 8, 2))