From 2aca38998c11a6db707b82a81f4252f1bb6a8125 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 12:22:19 +0000 Subject: [PATCH 01/24] Revert earlier change and use to_numpy Revert #24048 change that caused bug. --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ade05ab27093e..1730c2cc78e2a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4615,7 +4615,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: raise TypeError('must specify how or thresh') - result = self.loc(axis=axis)[mask] + result = self._take(mask.nonzero()[0], axis=axis) if inplace: self._update_inplace(result) From 2d12d2fbc4c906ce7c22121a4b18a649725cfab1 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 12:25:47 +0000 Subject: [PATCH 02/24] Remove warning by using to_numpy --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1730c2cc78e2a..daa6a6683e6b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4615,7 +4615,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: raise TypeError('must specify how or thresh') - result = self._take(mask.nonzero()[0], axis=axis) + result = self._take(mask.to_numpy()nonzero()[0], axis=axis) if inplace: self._update_inplace(result) From f3571011e1d68a9b0c4421b3eada050ce29cba65 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 12:26:17 +0000 Subject: [PATCH 03/24] Make warning go away --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index daa6a6683e6b9..4a930aab7fd93 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4615,7 +4615,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: raise TypeError('must specify how or thresh') - result = self._take(mask.to_numpy()nonzero()[0], axis=axis) + result = self._take(mask.to_numpy().nonzero()[0], axis=axis) if inplace: self._update_inplace(result) From 7cc4c372c300ce168ccb2937453b9caa88fe3a2d Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 15:56:02 +0000 Subject: [PATCH 04/24] revert initial bugfix Write tests first --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4a930aab7fd93..9d3922edd9d6e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4615,7 +4615,8 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: raise TypeError('must specify how or thresh') - result = self._take(mask.to_numpy().nonzero()[0], axis=axis) + #result = self._take(mask.to_numpy().nonzero()[0], axis=axis) + result = self.loc(axis=axis)[mask] if inplace: self._update_inplace(result) From 8ec653a01a477fb9b9e72668ba44ca21e608e3dc Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 15:56:19 +0000 Subject: [PATCH 05/24] Add test for contains in interval index categorical When tested with a variable that has the wrong dtype, this raises an exception instead of False --- pandas/tests/indexing/test_categorical.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 317aac1766cf8..cdfb48fafbdb6 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -78,6 +78,12 @@ def test_getitem_scalar(self): result = s[cats[0]] assert result == expected + def test_contains_interval_range(self): + """Check we can use contains """ + intervals = pd.interval_range(0.0, 1.0) + cats = pd.Categorical(intervals) + assert 'gg' not in cats + def test_slicing_directly(self): cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) sliced = cat[3] From 878802e7b7c8930711af5729ddd6479711d729c6 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 16:07:35 +0000 Subject: [PATCH 06/24] Check get_loc on interval index raises KeyError When supplied a variable with the wrong type get_loc should raise a KeyError (not type error). Otherwise things like checking if a variable is in an index will fail. --- pandas/tests/indexing/test_loc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3bf4a6bee4af9..6e7906eede639 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -778,3 +778,9 @@ def test_loc_setitem_empty_append_raises(self): msg = "cannot copy sequence with size 2 to array axis with dimension 0" with pytest.raises(ValueError, match=msg): df.loc[0:2, 'x'] = data + + def test_loc_getitem_interval_index(self): + """ GH25087, test get_loc returns key error for interval indexes""" + idx = pd.interval_range(0, 1.0) + with pytest.raises(KeyError): + idx.get_loc('gg') From 4dabe0e45ad5124e459a8d0c04e02fbcfcc6bcfa Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 16:18:00 +0000 Subject: [PATCH 07/24] Add test for get_indexer --- pandas/tests/indexes/interval/test_interval.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index f1fd06c9cab6e..60d3a473138cc 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -886,6 +886,12 @@ def test_symmetric_difference(self, closed, sort): result = index.symmetric_difference(other, sort=sort) tm.assert_index_equal(result, expected) + def test_get_indexer_errors(self): + # not sure about this one + index = pd.interval_range(0, 1) + with pytest.raises(KeyError): + index.get_indexer(['gg']) + @pytest.mark.parametrize('op_name', [ 'union', 'intersection', 'difference', 'symmetric_difference']) @pytest.mark.parametrize("sort", [None, False]) From 564d88dadca3113a3e6896d414c7687d81c76874 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 16:20:34 +0000 Subject: [PATCH 08/24] Add a test for get_indexer with different type --- pandas/tests/indexes/interval/test_interval.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 60d3a473138cc..8e680d92da670 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -886,11 +886,12 @@ def test_symmetric_difference(self, closed, sort): result = index.symmetric_difference(other, sort=sort) tm.assert_index_equal(result, expected) - def test_get_indexer_errors(self): + def test_interval_range_get_indexer_with_different_input_type(self): # not sure about this one index = pd.interval_range(0, 1) - with pytest.raises(KeyError): - index.get_indexer(['gg']) + # behaviour should be the same as Int64Index and return an + # array with values of -1 + assert np.all(index.get_indexer(['gg']) == np.array([-1])) @pytest.mark.parametrize('op_name', [ 'union', 'intersection', 'difference', 'symmetric_difference']) From f4c43e30d8d0ad65ead1078c82041ed75a0ac9ed Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 16:30:50 +0000 Subject: [PATCH 09/24] Make first two tests pass --- pandas/core/indexes/interval.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2c63fe33c57fe..507f183c0d73a 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -766,8 +766,13 @@ def get_loc(self, key, method=None): key = Interval(left, right, key.closed) else: key = self._maybe_cast_slice_bound(key, 'left', None) - - start, stop = self._find_non_overlapping_monotonic_bounds(key) + try: + start, stop = self._find_non_overlapping_monotonic_bounds(key) + except TypeError: + # get loc should raise KeyError + # if key is hashable but + # of an incorrect type + raise KeyError if start is None or stop is None: return slice(start, stop) From a09a07e4dabe91a8cfe46bec40414a2ddc4225a3 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Feb 2019 16:30:58 +0000 Subject: [PATCH 10/24] Make third test pass This is enough for making the test pass but it's not the right implementation --- pandas/core/indexes/interval.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 507f183c0d73a..b06358910724c 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -824,7 +824,14 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return np.arange(len(self), dtype='intp') if self.is_non_overlapping_monotonic: - start, stop = self._find_non_overlapping_monotonic_bounds(target) + try: + start, stop = ( + self._find_non_overlapping_monotonic_bounds() + ) + except TypeError: + # This is probably wrong + # but not sure what I should do here + return np.array([-1]) start_plus_one = start + 1 if not ((start_plus_one < stop).any()): From 6c887e6b03e5a632dcbf50fd69a149a13773243e Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 3 Feb 2019 18:46:00 +0000 Subject: [PATCH 11/24] remove commented out code --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9d3922edd9d6e..ade05ab27093e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4615,7 +4615,6 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None, else: raise TypeError('must specify how or thresh') - #result = self._take(mask.to_numpy().nonzero()[0], axis=axis) result = self.loc(axis=axis)[mask] if inplace: From 0a143f261e41fc6fb2c7dd69fb7af745b543746e Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 3 Feb 2019 18:49:12 +0000 Subject: [PATCH 12/24] Improve error message --- pandas/core/indexes/interval.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b06358910724c..1efbfb204c6d9 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -769,10 +769,8 @@ def get_loc(self, key, method=None): try: start, stop = self._find_non_overlapping_monotonic_bounds(key) except TypeError: - # get loc should raise KeyError - # if key is hashable but - # of an incorrect type - raise KeyError + # get_loc should raise KeyError + raise KeyError('key is hashable but of incorrect type') if start is None or stop is None: return slice(start, stop) From 0730cd613f87cf98b301efac335188e3d7f93d02 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 3 Feb 2019 20:50:53 +0000 Subject: [PATCH 13/24] Rename, move and parametrize indexer test Include some non-monotonic/overlapping IntervalIndex. This triggers another bug, due to the fact that self.get_loc(i) is called on an unexpected key. --- pandas/core/indexes/interval.py | 16 +++++++++++++--- pandas/tests/indexes/interval/test_interval.py | 14 +++++++------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1efbfb204c6d9..d6f026667a643 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -788,7 +788,10 @@ def get_loc(self, key, method=None): left, right = _get_interval_closed_bounds(key) return self._engine.get_loc_interval(left, right) else: - return self._engine.get_loc(key) + try: + return self._engine.get_loc(key) + except TypeError: + raise KeyError('No engine for key {!r}'.format(key)) def get_value(self, series, key): if com.is_bool_indexer(key): @@ -829,7 +832,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): except TypeError: # This is probably wrong # but not sure what I should do here - return np.array([-1]) + #return np.array([-1]) + return np.repeat(np.int(-1), len(target)) start_plus_one = start + 1 if not ((start_plus_one < stop).any()): @@ -844,7 +848,13 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): # non IntervalIndex else: - indexer = np.concatenate([self.get_loc(i) for i in target]) + vals = [] + for i in target: + try: + vals.append(self.get_loc(i)) + except KeyError: + vals.append(np.array([-1])) + indexer = np.concatenate(vals) return ensure_platform_int(indexer) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 8e680d92da670..0ae519e5727ab 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -616,6 +616,13 @@ def test_get_indexer_length_one(self, item, closed): expected = np.array([0] * len(item), dtype='intp') tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize('index', [ + pd.interval_range(0, 1), + pd.IntervalIndex.from_tuples([(1, 3), (2, 4), (0, 2)]) + ]) + def test_get_indexer_errors(self, index): + assert np.all(index.get_indexer(['gg']) == np.array([-1])) + # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize('arrays', [ (date_range('20180101', periods=4), date_range('20180103', periods=4)), @@ -886,13 +893,6 @@ def test_symmetric_difference(self, closed, sort): result = index.symmetric_difference(other, sort=sort) tm.assert_index_equal(result, expected) - def test_interval_range_get_indexer_with_different_input_type(self): - # not sure about this one - index = pd.interval_range(0, 1) - # behaviour should be the same as Int64Index and return an - # array with values of -1 - assert np.all(index.get_indexer(['gg']) == np.array([-1])) - @pytest.mark.parametrize('op_name', [ 'union', 'intersection', 'difference', 'symmetric_difference']) @pytest.mark.parametrize("sort", [None, False]) From 246eb57e00f98b3fb199b9b0a5f1753d2ede420a Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 3 Feb 2019 20:58:46 +0000 Subject: [PATCH 14/24] Use numpy_array_equal in indexer test --- pandas/core/indexes/interval.py | 3 --- pandas/tests/indexes/interval/test_interval.py | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index d6f026667a643..721ef0c7f06f5 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -830,9 +830,6 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): self._find_non_overlapping_monotonic_bounds() ) except TypeError: - # This is probably wrong - # but not sure what I should do here - #return np.array([-1]) return np.repeat(np.int(-1), len(target)) start_plus_one = start + 1 diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 0ae519e5727ab..e8855f4dd6a73 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -621,7 +621,8 @@ def test_get_indexer_length_one(self, item, closed): pd.IntervalIndex.from_tuples([(1, 3), (2, 4), (0, 2)]) ]) def test_get_indexer_errors(self, index): - assert np.all(index.get_indexer(['gg']) == np.array([-1])) + expected = np.array([-1], dtype='intp') + assert tm.assert_numpy_array_equal(index.get_indexer(['gg']), expected) # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize('arrays', [ From 93f75ea4a36a9fabc610af7f35634f82a3e3e6e9 Mon Sep 17 00:00:00 2001 From: Samuel Date: Mon, 4 Feb 2019 06:51:40 +0000 Subject: [PATCH 15/24] Refactor interval index get_loc test Move the test from indexing/test_loc to index/interval/test_interval. --- pandas/tests/indexes/interval/test_interval.py | 8 ++++++++ pandas/tests/indexing/test_loc.py | 6 ------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index e8855f4dd6a73..fcbb04d2b71a4 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -435,6 +435,14 @@ def test_get_loc_value(self): idx = IntervalIndex.from_arrays([0, 2], [1, 3]) pytest.raises(KeyError, idx.get_loc, 1.5) + # GH25087, test get_loc returns key error for interval indexes + msg = 'key is hashable but of incorrect type' + with pytest.raises(KeyError, match=msg): + idx.get_loc('a') + idx = pd.interval_range(0, 1.0) + with pytest.raises(KeyError, match=msg): + idx.get_loc('a') + # To be removed, replaced by test_interval_new.py (see #16316, #16386) def slice_locs_cases(self, breaks): # TODO: same tests for more index types diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6e7906eede639..3bf4a6bee4af9 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -778,9 +778,3 @@ def test_loc_setitem_empty_append_raises(self): msg = "cannot copy sequence with size 2 to array axis with dimension 0" with pytest.raises(ValueError, match=msg): df.loc[0:2, 'x'] = data - - def test_loc_getitem_interval_index(self): - """ GH25087, test get_loc returns key error for interval indexes""" - idx = pd.interval_range(0, 1.0) - with pytest.raises(KeyError): - idx.get_loc('gg') From 268db81d6ccfbb82201e071b8b5ebdc5a3abf647 Mon Sep 17 00:00:00 2001 From: Samuel Date: Mon, 4 Feb 2019 07:31:58 +0000 Subject: [PATCH 16/24] Fix bug introduced in earlier commit target was missing from call to _find_non_overlapping_monotonic_bounds --- pandas/core/indexes/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 721ef0c7f06f5..f3c6dbc931a5d 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -827,7 +827,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if self.is_non_overlapping_monotonic: try: start, stop = ( - self._find_non_overlapping_monotonic_bounds() + self._find_non_overlapping_monotonic_bounds(target) ) except TypeError: return np.repeat(np.int(-1), len(target)) From a5aa1e8c336dcf605392454309c7ca43c3b09b98 Mon Sep 17 00:00:00 2001 From: Sam Sinayoko Date: Wed, 6 Feb 2019 07:51:59 +0000 Subject: [PATCH 17/24] Add reminder comment to use raise from for python 3 --- pandas/core/indexes/interval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index f3c6dbc931a5d..478566f6589b6 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -770,6 +770,7 @@ def get_loc(self, key, method=None): start, stop = self._find_non_overlapping_monotonic_bounds(key) except TypeError: # get_loc should raise KeyError + # TODO(py3): use raise from. raise KeyError('key is hashable but of incorrect type') if start is None or stop is None: From d480872281d53e8c7ae665379bee5c69bc19e901 Mon Sep 17 00:00:00 2001 From: Sam Sinayoko Date: Wed, 6 Feb 2019 07:53:57 +0000 Subject: [PATCH 18/24] Include key in error message. --- pandas/core/indexes/interval.py | 7 +++++-- pandas/tests/indexes/interval/test_interval.py | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 478566f6589b6..c2f4b9adbe2f3 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -771,7 +771,8 @@ def get_loc(self, key, method=None): except TypeError: # get_loc should raise KeyError # TODO(py3): use raise from. - raise KeyError('key is hashable but of incorrect type') + raise KeyError('Key {!r} is hashable but of incorrect type.' + .format(key)) if start is None or stop is None: return slice(start, stop) @@ -792,7 +793,9 @@ def get_loc(self, key, method=None): try: return self._engine.get_loc(key) except TypeError: - raise KeyError('No engine for key {!r}'.format(key)) + msg = ('Key {!r} not found (does match index type {}).' + .format(key, self.dtype)) + raise KeyError(msg) def get_value(self, series, key): if com.is_bool_indexer(key): diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index fcbb04d2b71a4..7e4c453f67866 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -436,9 +436,10 @@ def test_get_loc_value(self): pytest.raises(KeyError, idx.get_loc, 1.5) # GH25087, test get_loc returns key error for interval indexes - msg = 'key is hashable but of incorrect type' + key = 'a' + msg = 'Key {!r} is hashable but of incorrect type'.format(key) with pytest.raises(KeyError, match=msg): - idx.get_loc('a') + idx.get_loc(key) idx = pd.interval_range(0, 1.0) with pytest.raises(KeyError, match=msg): idx.get_loc('a') From 6ed1080df9240643d4479981a9e0a95423aad484 Mon Sep 17 00:00:00 2001 From: Sam Sinayoko Date: Wed, 6 Feb 2019 08:46:43 +0000 Subject: [PATCH 19/24] Add larger interval range to test --- pandas/tests/indexes/interval/test_interval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 7e4c453f67866..bd483d33960de 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -627,6 +627,7 @@ def test_get_indexer_length_one(self, item, closed): @pytest.mark.parametrize('index', [ pd.interval_range(0, 1), + pd.interval_range(0, 3), pd.IntervalIndex.from_tuples([(1, 3), (2, 4), (0, 2)]) ]) def test_get_indexer_errors(self, index): From 120e2bc32ce32b7239c2a7e5b8d34295d704bab9 Mon Sep 17 00:00:00 2001 From: Sam Sinayoko Date: Wed, 6 Feb 2019 08:51:53 +0000 Subject: [PATCH 20/24] get_loc should raise KeyError if the supplied key has the wrong type --- pandas/core/indexes/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index c2f4b9adbe2f3..b4509c80ed8bf 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -809,7 +809,7 @@ def get_value(self, series, key): try: loc = self.get_loc(key) - except TypeError: + except KeyError: # we didn't find exact intervals or are non-unique msg = "unable to slice with this key: {key}".format(key=key) raise ValueError(msg) From 2c4827236983c4c4755661bede38daa40d3f2067 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 10 Feb 2019 17:41:18 +0000 Subject: [PATCH 21/24] Only return -1 in get_indexer for incorrect values Instead of returning [-1, -1, -1] when the middle value is incorrect type, return [a, -1, b]. --- pandas/core/indexes/interval.py | 13 ++++++------- pandas/tests/indexes/interval/test_interval.py | 8 ++++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b4509c80ed8bf..436172f1f53fe 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -833,12 +833,11 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): start, stop = ( self._find_non_overlapping_monotonic_bounds(target) ) + start_plus_one = start + 1 + if not ((start_plus_one < stop).any()): + return np.where(start_plus_one == stop, start, -1) except TypeError: - return np.repeat(np.int(-1), len(target)) - - start_plus_one = start + 1 - if not ((start_plus_one < stop).any()): - return np.where(start_plus_one == stop, start, -1) + pass if not self.is_unique: raise ValueError("cannot handle non-unique indices") @@ -854,8 +853,8 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): try: vals.append(self.get_loc(i)) except KeyError: - vals.append(np.array([-1])) - indexer = np.concatenate(vals) + vals.append(-1) + indexer = np.array(vals) return ensure_platform_int(indexer) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index bd483d33960de..32b0dd708776d 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -591,6 +591,14 @@ def test_get_indexer(self): expected = np.array([-1, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) + actual = self.index.get_indexer(['a', 1]) + expected = np.array([-1, 0], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index.get_indexer(['a', 1, 'b']) + expected = np.array([-1, 0, -1], dtype='intp') + tm.assert_numpy_array_equal(actual, expected) + # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_indexer_subintervals(self): From 02127ffc4df205ca8076119a2537f8372f0a1eca Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 10 Feb 2019 17:57:01 +0000 Subject: [PATCH 22/24] Better tests for get_indexer_errors Add mix of invalid and valid values --- .../tests/indexes/interval/test_interval.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 32b0dd708776d..3759a2ec0ba6d 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -633,14 +633,23 @@ def test_get_indexer_length_one(self, item, closed): expected = np.array([0] * len(item), dtype='intp') tm.assert_numpy_array_equal(result, expected) - @pytest.mark.parametrize('index', [ - pd.interval_range(0, 1), - pd.interval_range(0, 3), - pd.IntervalIndex.from_tuples([(1, 3), (2, 4), (0, 2)]) + @pytest.mark.parametrize('index,value,expected_index', [ + (pd.interval_range(0, 1), 0.5, 0), + (pd.interval_range(0, 3), 0.5, 0), + (pd.IntervalIndex.from_tuples([(1, 3), (2, 4), (0, 2)]), 0.5, 2) ]) - def test_get_indexer_errors(self, index): + def test_get_indexer_errors(self, index, value, expected_index): + actual = index.get_indexer(['a']) expected = np.array([-1], dtype='intp') - assert tm.assert_numpy_array_equal(index.get_indexer(['gg']), expected) + assert tm.assert_numpy_array_equal(actual, expected) + + actual = index.get_indexer(['a', 'b']) + expected = np.array([-1, -1], dtype='intp') + assert tm.assert_numpy_array_equal(actual, expected) + + actual = index.get_indexer(['a', value, 'b']) + expected = np.array([-1, expected_index, -1], dtype='intp') + assert tm.assert_numpy_array_equal(actual, expected) # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize('arrays', [ From ad13d9e121cb90f49890d54f7c58f18b6f9ea10c Mon Sep 17 00:00:00 2001 From: Sam Sinayoko Date: Mon, 11 Feb 2019 08:58:58 +0000 Subject: [PATCH 23/24] Fix broken test in test_interval Fixes test_with_overlaps test --- pandas/core/indexes/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 436172f1f53fe..2dcf8a12cfb74 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -854,7 +854,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): vals.append(self.get_loc(i)) except KeyError: vals.append(-1) - indexer = np.array(vals) + indexer = np.array(vals).flatten() return ensure_platform_int(indexer) From 0ff356cd374752fbb6b309a3fce515c21c3e747b Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 16 Feb 2019 16:29:02 +0000 Subject: [PATCH 24/24] Fix broken tests in test_concat interval.get_indexer() should still raise a TypeError in cases where the types are unorderable. This is needed for DataFrame.append for example, which was breaking tests in test_concat. --- pandas/core/indexes/interval.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2dcf8a12cfb74..c2930d985fb53 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -836,8 +836,14 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): start_plus_one = start + 1 if not ((start_plus_one < stop).any()): return np.where(start_plus_one == stop, start, -1) - except TypeError: - pass + except TypeError as err: + # Only raise a type error when the types are not + # orderable, such as when the caller is combining + # an interval index with an integer index. + # (see test_append_different_columns_types_raises + # in pandas/tests/reshape/test_concat.py for more examples). + if err.args and 'unorderable types:' in err.args[0]: + raise if not self.is_unique: raise ValueError("cannot handle non-unique indices")