diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 90198fa48bcb4..7fc6f6d197dff 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -273,25 +273,6 @@ For getting fast access to a scalar (equiv to the prior method) df.iat[1,1] -There is one signficant departure from standard python/numpy slicing semantics. -python/numpy allow slicing past the end of an array without an associated -error. - -.. ipython:: python - - # these are allowed in python/numpy. - x = list('abcdef') - x[4:10] - x[8:10] - -Pandas will detect this and raise ``IndexError``, rather than return an empty -structure. - -:: - - >>> df.iloc[:,8:10] - IndexError: out-of-bounds on slice (end) - Boolean Indexing ~~~~~~~~~~~~~~~~ diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 0c4b57358d3d1..d65c1519fe869 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -77,8 +77,9 @@ of multi-axis indexing. See more at :ref:`Selection by Label ` - ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of - the axis), will raise ``IndexError`` when the requested indicies are out of - bounds. Allowed inputs are: + the axis), will raise ``IndexError`` if a single index is requested and it + is out-of-bounds, otherwise it will conform the bounds to size of the object. + Allowed inputs are: - An integer e.g. ``5`` - A list or array of integers ``[4, 3, 0]`` @@ -420,12 +421,19 @@ python/numpy allow slicing past the end of an array without an associated error. x[4:10] x[8:10] -Pandas will detect this and raise ``IndexError``, rather than return an empty structure. +- as of v0.14.0, ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being + indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds + values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise + ``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned) -:: + .. ipython:: python - >>> df.iloc[:,3:6] - IndexError: out-of-bounds on slice (end) + df = DataFrame(np.random.randn(5,2),columns=list('AB')) + df + df.iloc[[4,5,6]] + df.iloc[4:6] + df.iloc[:,2:3] + df.iloc[:,1:3] .. _indexing.basics.partial_setting: diff --git a/doc/source/release.rst b/doc/source/release.rst index 4d2130979392d..d3814ab324e92 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -56,6 +56,10 @@ New features API Changes ~~~~~~~~~~~ +- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being + indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds + values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise + ``IndexError`` (:issue:`6296`) Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index d044c254f9482..ee38fed810af0 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -15,6 +15,20 @@ Highlights include: API changes ~~~~~~~~~~~ +- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being + indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds + values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise + ``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned) + + .. ipython:: python + + df = DataFrame(np.random.randn(5,2),columns=list('AB')) + df + df.iloc[[4,5,6]] + df.iloc[4:6] + df.iloc[:,2:3] + df.iloc[:,1:3] + Prior Version Deprecations/Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 68b35db3827c8..03e16f243836a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1756,10 +1756,6 @@ def head(self, n=5): l = len(self) if l == 0 or n==0: return self - if n > l: - n = l - elif n < -l: - n = -l return self.iloc[:n] def tail(self, n=5): @@ -1769,10 +1765,6 @@ def tail(self, n=5): l = len(self) if l == 0 or n == 0: return self - if n > l: - n = l - elif n < -l: - n = -l return self.iloc[-n:] #---------------------------------------------------------------------- diff --git a/pandas/core/index.py b/pandas/core/index.py index 3b58b27c7569f..5a02c0445c006 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -621,9 +621,15 @@ def __getitem__(self, key): if com._is_bool_indexer(key): key = np.asarray(key) - result = arr_idx[key] - if result.ndim > 1: - return result + try: + result = arr_idx[key] + if result.ndim > 1: + return result + except (IndexError): + if not len(key): + result = [] + else: + raise return Index(result, name=self.name) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d2f538decd576..029055d80b1af 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -73,6 +73,29 @@ def _get_loc(self, key, axis=0): return self.obj._ixs(key, axis=axis) def _slice(self, obj, axis=0, raise_on_error=False, typ=None): + + # make out-of-bounds into bounds of the object + if typ == 'iloc': + ax = self.obj._get_axis(axis) + l = len(ax) + start = obj.start + stop = obj.stop + step = obj.step + if start is not None: + # degenerate to return nothing + if start >= l: + return self._getitem_axis(tuple(),axis=axis) + + # equiv to a null slice + elif start <= -l: + start = None + if stop is not None: + if stop > l: + stop = None + elif stop <= -l: + stop = None + obj = slice(start,stop,step) + return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error, typ=typ) @@ -1188,14 +1211,23 @@ def _getitem_tuple(self, tup): pass retval = self.obj + axis=0 for i, key in enumerate(tup): if i >= self.obj.ndim: raise IndexingError('Too many indexers') if _is_null_slice(key): + axis += 1 continue - retval = getattr(retval, self.name)._getitem_axis(key, axis=i) + retval = getattr(retval, self.name)._getitem_axis(key, axis=axis) + + # if the dim was reduced, then pass a lower-dim the next time + if retval.ndim= l or arr.min() <= -l): + key = arr[(arr>-l) & (arr len(ax): + raise IndexError("single indexer is out-of-bounds") + return self._get_loc(key, axis=axis) def _convert_to_indexer(self, obj, axis=0, is_setter=False): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d0a8e1c06fd28..a7e1548b41bbb 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3246,7 +3246,7 @@ def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=None, pandas-indexer with -1's only. """ # trying to reindex on an axis with duplicates - if not allow_dups and not self.axes[axis].is_unique: + if not allow_dups and not self.axes[axis].is_unique and len(indexer): raise ValueError("cannot reindex from a duplicate axis") if not self.is_consolidated(): diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 958ca81b0a2ee..6c7e455bb1c03 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -873,7 +873,7 @@ def test_equals(self): s2[0] = 9.9 self.assert_(not s1.equals(s2)) - + idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) s1 = Series([1, 2, np.nan], index=idx) s2 = s1.copy() @@ -900,17 +900,17 @@ def test_equals(self): # different dtype different = df1.copy() different['floats'] = different['floats'].astype('float32') - self.assert_(not df1.equals(different)) + self.assert_(not df1.equals(different)) # different index different_index = -index different = df2.set_index(different_index) - self.assert_(not df1.equals(different)) + self.assert_(not df1.equals(different)) # different columns different = df2.copy() different.columns = df2.columns[::-1] - self.assert_(not df1.equals(different)) + self.assert_(not df1.equals(different)) # DatetimeIndex index = pd.date_range('2000-1-1', periods=10, freq='T') diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 97cdc5ff349a1..52de461f0281b 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -339,6 +339,72 @@ def test_repeated_getitem_dups(self): result = df.loc[:,0].loc['A'] assert_series_equal(result,expected) + def test_iloc_exceeds_bounds(self): + + # GH6296 + # iloc should allow indexers that exceed the bounds + df = DataFrame(np.random.random_sample((20,5)), columns=list('ABCDE')) + expected = df + result = df.iloc[:,[0,1,2,3,4,5]] + assert_frame_equal(result,expected) + + result = df.iloc[[1,30]] + expected = df.iloc[[1]] + assert_frame_equal(result,expected) + + result = df.iloc[[1,-30]] + expected = df.iloc[[1]] + assert_frame_equal(result,expected) + + result = df.iloc[:,4:10] + expected = df.iloc[:,4:] + assert_frame_equal(result,expected) + + result = df.iloc[:,-4:-10] + expected = df.iloc[:,-4:] + assert_frame_equal(result,expected) + + result = df.iloc[[100]] + expected = DataFrame(columns=df.columns) + assert_frame_equal(result,expected) + + # still raise on a single indexer + def f(): + df.iloc[30] + self.assertRaises(IndexError, f) + + s = df['A'] + result = s.iloc[[100]] + expected = Series() + assert_series_equal(result,expected) + + result = s.iloc[[-100]] + expected = Series() + assert_series_equal(result,expected) + + # slice + result = s.iloc[18:30] + expected = s.iloc[18:] + assert_series_equal(result,expected) + + # doc example + df = DataFrame(np.random.randn(5,2),columns=list('AB')) + result = df.iloc[[4,5,6]] + expected = df.iloc[[4]] + assert_frame_equal(result,expected) + + result = df.iloc[4:6] + expected = df.iloc[[4]] + assert_frame_equal(result,expected) + + result = df.iloc[:,2:3] + expected = DataFrame(index=df.index) + assert_frame_equal(result,expected) + + result = df.iloc[:,1:3] + expected = df.iloc[:,[1]] + assert_frame_equal(result,expected) + def test_iloc_getitem_int(self): # integer @@ -442,14 +508,6 @@ def test_iloc_getitem_multiindex(self): xp = df.xs('b',drop_level=False) assert_frame_equal(rs,xp) - def test_iloc_getitem_out_of_bounds(self): - - # out-of-bounds slice - self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(None),slice(1,5,None)])) - self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(None),slice(-5,3,None)])) - self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(1,5,None)])) - self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(-5,3,None)])) - def test_iloc_setitem(self): df = self.frame_ints @@ -738,12 +796,6 @@ def test_iloc_getitem_frame(self): expected = df.ix[[2,4,6,8]] assert_frame_equal(result, expected) - # out-of-bounds slice - self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(None),slice(1,5,None)])) - self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(None),slice(-5,3,None)])) - self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(1,11,None)])) - self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(-11,3,None)])) - # try with labelled frame df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), columns=list('ABCD'))