From ebd9d031d5146ef722ffe3db301c1320ef86c9fe Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 10:25:19 -0700 Subject: [PATCH 01/11] disable unused getitem --- pandas/core/generic.py | 2 +- pandas/tests/indexing/test_indexing.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9053edf2d1424..37f2399b38a89 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3251,7 +3251,7 @@ def get(self, key, default=None): return default def __getitem__(self, item): - return self._get_item_cache(item) + raise AbstractMethodError(self) def _get_item_cache(self, item): """Return the cached item, item represents a label indexer.""" diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 8d1971801cccc..e375bd459e66f 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -8,6 +8,7 @@ import pytest from pandas.compat import PY36 +from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype @@ -1168,7 +1169,7 @@ def test_extension_array_cross_section_converts(): @pytest.mark.parametrize( "idxr, error, error_message", [ - (lambda x: x, AttributeError, "'numpy.ndarray' object has no attribute 'get'"), + (lambda x: x, AbstractMethodError, None), ( lambda x: x.loc, AttributeError, From e0e039fddf7da4917c989c01f7d3173d48f793e6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 10:28:41 -0700 Subject: [PATCH 02/11] remove unhit branch --- pandas/core/generic.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 37f2399b38a89..2644d52354ee3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3346,11 +3346,8 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True): if clear: self._clear_item_cache() - def _clear_item_cache(self, i=None): - if i is not None: - self._item_cache.pop(i, None) - else: - self._item_cache.clear() + def _clear_item_cache(self): + self._item_cache.clear() def _slice(self, slobj, axis=0, kind=None): """ From 330f8af33b792fc9d444ff3e545745f8235f758d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 11:28:06 -0700 Subject: [PATCH 03/11] move maybe_droplevels --- pandas/core/frame.py | 10 +++------- pandas/core/indexes/multi.py | 37 ++++++++++++++++++++++++++---------- pandas/core/indexing.py | 19 ------------------ 3 files changed, 30 insertions(+), 36 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a84f1ddd87a5..a9dcbe38ccaf1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -94,12 +94,9 @@ ) from pandas.core.indexes import base as ibase from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.multi import maybe_droplevels from pandas.core.indexes.period import PeriodIndex -from pandas.core.indexing import ( - check_bool_indexer, - convert_to_index_sliceable, - maybe_droplevels, -) +from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable from pandas.core.internals import BlockManager from pandas.core.internals.construction import ( arrays_to_mgr, @@ -2868,8 +2865,6 @@ def _ixs(self, i: int, axis: int = 0): if axis == 0: label = self.index[i] new_values = self._data.fast_xs(i) - if is_scalar(new_values): - return new_values # if we are a copy, mark as such copy = isinstance(new_values, np.ndarray) and new_values.base is None @@ -2980,6 +2975,7 @@ def _getitem_bool_array(self, key): return self.take(indexer, axis=0) def _getitem_multilevel(self, key): + # self.columns is a MultiIndex loc = self.columns.get_loc(key) if isinstance(loc, (slice, Series, np.ndarray, Index)): new_columns = self.columns[loc] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0123230bc0f63..0e8e54444d51d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1470,9 +1470,6 @@ def dropna(self, how="any"): return self.copy(codes=new_codes, deep=True) def get_value(self, series, key): - # somewhat broken encapsulation - from pandas.core.indexing import maybe_droplevels - # Label-based s = com.values_from_object(series) k = com.values_from_object(key) @@ -2709,7 +2706,7 @@ def _maybe_to_slice(loc): return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) - def get_loc_level(self, key, level=0, drop_level=True): + def get_loc_level(self, key, level=0, drop_level: bool = True): """ Get both the location for the requested label(s) and the resulting sliced index. @@ -2750,7 +2747,8 @@ def get_loc_level(self, key, level=0, drop_level=True): (1, None) """ - def maybe_droplevels(indexer, levels, drop_level): + # different name to distinguish from maybe_droplevels + def maybe_mi_droplevels(indexer, levels, drop_level: bool): if not drop_level: return self[indexer] # kludgearound @@ -2780,7 +2778,7 @@ def maybe_droplevels(indexer, levels, drop_level): result = loc if result is None else result & loc - return result, maybe_droplevels(result, level, drop_level) + return result, maybe_mi_droplevels(result, level, drop_level) level = self._get_level_number(level) @@ -2793,7 +2791,7 @@ def maybe_droplevels(indexer, levels, drop_level): try: if key in self.levels[0]: indexer = self._get_level_indexer(key, level=level) - new_index = maybe_droplevels(indexer, [0], drop_level) + new_index = maybe_mi_droplevels(indexer, [0], drop_level) return indexer, new_index except TypeError: pass @@ -2808,7 +2806,7 @@ def partial_selection(key, indexer=None): ilevels = [ i for i in range(len(key)) if key[i] != slice(None, None) ] - return indexer, maybe_droplevels(indexer, ilevels, drop_level) + return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) if len(key) == self.nlevels and self.is_unique: # Complete key in unique index -> standard get_loc @@ -2843,10 +2841,10 @@ def partial_selection(key, indexer=None): if indexer is None: indexer = slice(None, None) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, maybe_droplevels(indexer, ilevels, drop_level) + return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) else: indexer = self._get_level_indexer(key, level=level) - return indexer, maybe_droplevels(indexer, [level], drop_level) + return indexer, maybe_mi_droplevels(indexer, [level], drop_level) def _get_level_indexer(self, key, level=0, indexer=None): # return an indexer, boolean array or a slice showing where the key is @@ -3464,3 +3462,22 @@ def _sparsify(label_list, start=0, sentinel=""): def _get_na_rep(dtype): return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") + + +def maybe_droplevels(index, key): + # drop levels + original_index = index + if isinstance(key, tuple): + for _ in key: + try: + index = index.droplevel(0) + except ValueError: + # we have dropped too much, so back out + return original_index + else: + try: + index = index.droplevel(0) + except ValueError: + pass + + return index diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1d4ea54ef0d70..239feed9a31a9 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2469,25 +2469,6 @@ def need_slice(obj): ) -def maybe_droplevels(index, key): - # drop levels - original_index = index - if isinstance(key, tuple): - for _ in key: - try: - index = index.droplevel(0) - except ValueError: - # we have dropped too much, so back out - return original_index - else: - try: - index = index.droplevel(0) - except ValueError: - pass - - return index - - def _non_reducing_slice(slice_): """ Ensurse that a slice doesn't reduce to a Series or Scalar. From ca16db20b581493ab995367e741aa3cb0e7ec1f4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 11:41:45 -0700 Subject: [PATCH 04/11] make tuplify non-method --- pandas/core/indexing.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 239feed9a31a9..34b269f9b9532 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -286,7 +286,7 @@ def _has_valid_positional_setitem_indexer(self, indexer): raise IndexError("{0} cannot enlarge its target object".format(self.name)) else: if not isinstance(indexer, tuple): - indexer = self._tuplify(indexer) + indexer = _tuplify(self.ndim, indexer) for ax, i in zip(self.obj.axes, indexer): if isinstance(i, slice): # should check the stop slice? @@ -401,7 +401,7 @@ def _setitem_with_indexer(self, indexer, value): assert info_axis == 1 if not isinstance(indexer, tuple): - indexer = self._tuplify(indexer) + indexer = _tuplify(self.ndim, indexer) if isinstance(value, ABCSeries): value = self._align_series(indexer, value) @@ -1274,11 +1274,6 @@ def _convert_to_indexer( return {"key": obj} raise - def _tuplify(self, loc): - tup = [slice(None, None) for _ in range(self.ndim)] - tup[0] = loc - return tuple(tup) - def _get_slice_axis(self, slice_obj: slice, axis: int): # caller is responsible for ensuring non-None axis obj = self.obj @@ -2152,6 +2147,8 @@ def _convert_to_indexer( ) +# Note: aside from the constructor, this and subclasses do not use any +# _NDFrameIndexer methods class _ScalarAccessIndexer(_NDFrameIndexer): """ access scalars quickly """ @@ -2178,7 +2175,7 @@ def __setitem__(self, key, value): key = com.apply_if_callable(key, self.obj) if not isinstance(key, tuple): - key = self._tuplify(key) + key = _tuplify(self.ndim, key) if len(key) != self.obj.ndim: raise ValueError("Not enough indexers for scalar access (setting)!") key = list(self._convert_key(key, is_setter=True)) @@ -2320,6 +2317,12 @@ def _convert_key(self, key, is_setter: bool = False): return key +def _tuplify(ndim: int, loc) -> tuple: + tup = [slice(None, None) for _ in range(ndim)] + tup[0] = loc + return tuple(tup) + + def convert_to_index_sliceable(obj, key): """ if we are index sliceable, then return my slicer, otherwise return None From 9dafe6040450aacedf42ca54f6f079b5642d1abc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 12:30:05 -0700 Subject: [PATCH 05/11] remove _has_valid_setitem_indexer --- pandas/core/indexing.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 34b269f9b9532..a45bb780b765b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2147,9 +2147,7 @@ def _convert_to_indexer( ) -# Note: aside from the constructor, this and subclasses do not use any -# _NDFrameIndexer methods -class _ScalarAccessIndexer(_NDFrameIndexer): +class _ScalarAccessIndexer(_NDFrameIndexerBase): """ access scalars quickly """ def _convert_key(self, key, is_setter: bool = False): @@ -2306,9 +2304,6 @@ class _iAtIndexer(_ScalarAccessIndexer): _takeable = True - def _has_valid_setitem_indexer(self, indexer): - self._has_valid_positional_setitem_indexer(indexer) - def _convert_key(self, key, is_setter: bool = False): """ require integer args (and convert to label arguments) """ for a, i in zip(self.obj.axes, key): From f21e7d54e982ef675703817a0c2393f5ab65fc0e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 12:46:27 -0700 Subject: [PATCH 06/11] self.obj.ndim->self.ndim --- pandas/core/indexing.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a45bb780b765b..30aac3259c5ca 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -226,7 +226,7 @@ def _validate_key(self, key, axis: int): def _has_valid_tuple(self, key: Tuple): """ check the key for valid keys across my indexer """ for i, k in enumerate(key): - if i >= self.obj.ndim: + if i >= self.ndim: raise IndexingError("Too many indexers") try: self._validate_key(k, i) @@ -254,7 +254,7 @@ def _convert_tuple(self, key, is_setter: bool = False): keyidx.append(slice(None)) else: for i, k in enumerate(key): - if i >= self.obj.ndim: + if i >= self.ndim: raise IndexingError("Too many indexers") idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter) keyidx.append(idx) @@ -670,7 +670,7 @@ def ravel(i): aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 - is_frame = self.obj.ndim == 2 + is_frame = self.ndim == 2 obj = self.obj # are we a single alignable value on a non-primary @@ -731,7 +731,7 @@ def ravel(i): raise ValueError("Incompatible indexer with Series") def _align_frame(self, indexer, df: ABCDataFrame): - is_frame = self.obj.ndim == 2 + is_frame = self.ndim == 2 if isinstance(indexer, tuple): @@ -867,7 +867,7 @@ def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): except KeyError as ek: # raise KeyError if number of indexers match # else IndexingError will be raised - if len(tup) <= self.obj.index.nlevels and len(tup) > self.obj.ndim: + if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: raise ek except Exception as e1: if isinstance(tup[0], (slice, Index)): @@ -900,7 +900,7 @@ def _getitem_lowerdim(self, tup: Tuple): if result is not None: return result - if len(tup) > self.obj.ndim: + if len(tup) > self.ndim: raise IndexingError("Too many indexers. handle elsewhere") # to avoid wasted computation @@ -2174,7 +2174,7 @@ def __setitem__(self, key, value): if not isinstance(key, tuple): key = _tuplify(self.ndim, key) - if len(key) != self.obj.ndim: + if len(key) != self.ndim: raise ValueError("Not enough indexers for scalar access (setting)!") key = list(self._convert_key(key, is_setter=True)) key.append(value) From dd5fc2d79a7c5c12c867503bcc4afffc316ea8a6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 14:39:01 -0700 Subject: [PATCH 07/11] Comment --- pandas/core/indexing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 30aac3259c5ca..d648ae711c7cb 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2072,6 +2072,8 @@ def _getitem_tuple(self, tup: Tuple): # if the dim was reduced, then pass a lower-dim the next time if retval.ndim < self.ndim: + # TODO: this is never reached in tests; can we confirm that + # it is impossible? axis -= 1 # try to get for the next axis From 59e393568edd56f573f5efd368e085c50b5308f5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 14:49:21 -0700 Subject: [PATCH 08/11] remove case handled by set_engine above --- pandas/core/series.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 418b3fc8c57d0..2cebbf103d8fb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -47,7 +47,6 @@ ABCSparseSeries, ) from pandas.core.dtypes.missing import ( - is_valid_nat_for_dtype, isna, na_value_for_dtype, notna, @@ -1196,20 +1195,6 @@ def setitem(key, value): elif key is Ellipsis: self[:] = value return - elif com.is_bool_indexer(key): - pass - elif is_timedelta64_dtype(self.dtype): - # reassign a null value to iNaT - if is_valid_nat_for_dtype(value, self.dtype): - # exclude np.datetime64("NaT") - value = iNaT - - try: - self.index._engine.set_value(self._values, key, value) - return - except (TypeError, ValueError): - # ValueError appears in only some builds in CI - pass self.loc[key] = value return From e165ecaa01b7f78535951fa1966a694cb89096ab Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 24 Jul 2019 15:58:03 -0700 Subject: [PATCH 09/11] remove unused import --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2cebbf103d8fb..848e64eaf3cc6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -12,7 +12,7 @@ from pandas._config import get_option -from pandas._libs import iNaT, index as libindex, lib, reshape, tslibs +from pandas._libs import index as libindex, lib, reshape, tslibs from pandas.compat import PY36 from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, deprecate From 5c4927e386a81dfbda1d1574d8828ead9bf37c2e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 25 Jul 2019 10:57:27 -0700 Subject: [PATCH 10/11] docstrings --- pandas/core/indexes/multi.py | 12 ++++++++++++ pandas/core/indexing.py | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0e8e54444d51d..a7c3449615299 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3465,6 +3465,18 @@ def _get_na_rep(dtype): def maybe_droplevels(index, key): + """ + Attempt to drop level or levels from the given index. + + Parameters + ---------- + index: Index + key : scalar or tuple + + Returns + ------- + Index + """ # drop levels original_index = index if isinstance(key, tuple): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d648ae711c7cb..cb33044c4e23f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2315,6 +2315,19 @@ def _convert_key(self, key, is_setter: bool = False): def _tuplify(ndim: int, loc) -> tuple: + """ + Given an indexer for the first dimension, create an equivalent tuple + for indexing over all dimensions. + + Parameters + ---------- + ndim : int + loc : object + + Returns + ------- + tuple + """ tup = [slice(None, None) for _ in range(ndim)] tup[0] = loc return tuple(tup) From f0559a17010318cbd1da18fce62cb78994663a9e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 25 Jul 2019 12:42:29 -0700 Subject: [PATCH 11/11] dummy commit to force ci