From aacbaa8806ca37c932463a7276c8f86e85d7a67f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 25 Feb 2017 13:23:09 +0100 Subject: [PATCH 1/5] PERF: improve iloc list indexing --- pandas/core/indexing.py | 27 ++++++++++++++++----------- pandas/core/series.py | 4 ++-- pandas/indexes/base.py | 2 +- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c80e8c34aa88f..8bbb611ee71e2 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1697,26 +1697,24 @@ def _get_slice_axis(self, slice_obj, axis=0): else: return self.obj.take(slice_obj, axis=axis, convert=False) - def _get_list_axis(self, key_list, axis=0): + def _get_list_axis(self, key, axis=0): """ - Return Series values by list or array of integers + Return Series values by array of integers Parameters ---------- - key_list : list-like positional indexer + key : list-like positional indexer (already converted to array) axis : int (can only be zero) Returns ------- Series object """ - - # validate list bounds - self._is_valid_list_like(key_list, axis) - - # force an actual list - key_list = list(key_list) - return self.obj.take(key_list, axis=axis, convert=False) + try: + return self.obj.take(key, axis=axis, convert=False) + except IndexError: + # re-raise with different error message + raise IndexError("positional indexers are out-of-bounds") def _getitem_axis(self, key, axis=0): @@ -1724,7 +1722,13 @@ def _getitem_axis(self, key, axis=0): self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) - elif is_bool_indexer(key): + if isinstance(key, list): + try: + key = np.asarray(key) + except TypeError: # pragma: no cover + pass + + if is_bool_indexer(key): self._has_valid_type(key, axis) return self._getbool_axis(key, axis=axis) @@ -1734,6 +1738,7 @@ def _getitem_axis(self, key, axis=0): # a single integer else: + key = self._convert_scalar_indexer(key, axis) if not is_integer(key): diff --git a/pandas/core/series.py b/pandas/core/series.py index bcc1ed272b081..02e06cb3c50ff 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2378,7 +2378,7 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): -------- numpy.ndarray.take """ - nv.validate_take(tuple(), kwargs) + #nv.validate_take(tuple(), kwargs) # check/convert indicies here if convert: @@ -2388,7 +2388,7 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): new_index = self.index.take(indices) new_values = self._values.take(indices) return self._constructor(new_values, - index=new_index).__finalize__(self) + index=new_index, fastpath=True).__finalize__(self) def isin(self, values): """ diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 7f0de963e5c56..4f2d0ba472617 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1668,7 +1668,7 @@ def _append_same_dtype(self, to_concat, name): @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): - nv.validate_take(tuple(), kwargs) + #nv.validate_take(tuple(), kwargs) indices = _ensure_platform_int(indices) if self._can_hold_na: taken = self._assert_take_fillable(self.values, indices, From 6d2705cd3694fbaf480bb6da53f378397c11a8c2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 30 Mar 2017 22:49:08 +0200 Subject: [PATCH 2/5] take method: only validate kwargs if there are kwargs --- pandas/core/series.py | 3 ++- pandas/indexes/base.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 02e06cb3c50ff..ce7b638d3deb5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2378,7 +2378,8 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): -------- numpy.ndarray.take """ - #nv.validate_take(tuple(), kwargs) + if kwargs: + nv.validate_take(tuple(), kwargs) # check/convert indicies here if convert: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 4f2d0ba472617..91e2422873dd4 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1668,7 +1668,8 @@ def _append_same_dtype(self, to_concat, name): @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): - #nv.validate_take(tuple(), kwargs) + if kwargs: + nv.validate_take(tuple(), kwargs) indices = _ensure_platform_int(indices) if self._can_hold_na: taken = self._assert_take_fillable(self.values, indices, From 3e537b6ba58a504d24f21e1d1f60f3a3e30954c6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 31 Mar 2017 09:24:16 +0200 Subject: [PATCH 3/5] small clean-up --- pandas/core/indexing.py | 5 ++--- pandas/core/series.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8bbb611ee71e2..61a847ccf1523 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1699,11 +1699,11 @@ def _get_slice_axis(self, slice_obj, axis=0): def _get_list_axis(self, key, axis=0): """ - Return Series values by array of integers + Return Series values by list or array of integers Parameters ---------- - key : list-like positional indexer (already converted to array) + key : list-like positional indexer axis : int (can only be zero) Returns @@ -1738,7 +1738,6 @@ def _getitem_axis(self, key, axis=0): # a single integer else: - key = self._convert_scalar_indexer(key, axis) if not is_integer(key): diff --git a/pandas/core/series.py b/pandas/core/series.py index ce7b638d3deb5..bcd58ea791083 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2388,8 +2388,8 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): indices = _ensure_platform_int(indices) new_index = self.index.take(indices) new_values = self._values.take(indices) - return self._constructor(new_values, - index=new_index, fastpath=True).__finalize__(self) + return (self._constructor(new_values, index=new_index, fastpath=True) + .__finalize__(self)) def isin(self, values): """ From 74d45ae4a6a920800fc8691c2f53a68499884fdc Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 31 Mar 2017 23:23:00 +0200 Subject: [PATCH 4/5] add whatsnew --- doc/source/whatsnew/v0.20.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 399f91fc60810..a34b9feb2b2fa 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -905,7 +905,7 @@ Performance Improvements - Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) - Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied function used the ``.name`` attribute of the group DataFrame (:issue:`15062`). - +- Improved performance of ``iloc`` indexing with a list or array (:issue:`15504`). .. _whatsnew_0200.bug_fixes: From bf54a0b7846a610132a78355eabebba8530a5211 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 31 Mar 2017 23:30:08 +0200 Subject: [PATCH 5/5] TST: edit test_take to preserve original dtype --- pandas/tests/test_generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index a2329e2d1768e..0e8e8dc43ff03 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1870,7 +1870,7 @@ def test_take(self): tm.makeObjectSeries()]: out = s.take(indices) expected = Series(data=s.values.take(indices), - index=s.index.take(indices)) + index=s.index.take(indices), dtype=s.dtype) tm.assert_series_equal(out, expected) for df in [tm.makeTimeDataFrame()]: out = df.take(indices)