Skip to content

Commit 4b523f4

Browse files
committed
API: warning to raise KeyError in the future if not all elements of a list are selected via .loc
closes #15747
1 parent 870b6a6 commit 4b523f4

File tree

12 files changed

+169
-56
lines changed

12 files changed

+169
-56
lines changed

pandas/core/indexing.py

+25-5
Original file line numberDiff line numberDiff line change
@@ -1416,12 +1416,32 @@ def _has_valid_type(self, key, axis):
14161416
if isinstance(key, tuple) and isinstance(ax, MultiIndex):
14171417
return True
14181418

1419-
# TODO: don't check the entire key unless necessary
1420-
if (not is_iterator(key) and len(key) and
1421-
np.all(ax.get_indexer_for(key) < 0)):
1419+
if not is_iterator(key) and len(key):
14221420

1423-
raise KeyError("None of [%s] are in the [%s]" %
1424-
(key, self.obj._get_axis_name(axis)))
1421+
# True indicates missing values
1422+
missing = ax.get_indexer_for(key) < 0
1423+
1424+
if np.any(missing):
1425+
if len(key) == 1 or np.all(missing):
1426+
raise KeyError("None of [%s] are in the [%s]" %
1427+
(key, self.obj._get_axis_name(axis)))
1428+
1429+
else:
1430+
1431+
# we skip the warning on Categorical/Interval
1432+
# as this check is actually done (check for
1433+
# non-missing values), but a bit later in the
1434+
# code, so we want to avoid warning & then
1435+
# just raising
1436+
if not (ax.is_categorical() or ax.is_interval()):
1437+
warnings.warn(
1438+
"passing list-likes to .loc with "
1439+
"any non-matching elements will raise\n"
1440+
"KeyError in the future, "
1441+
"you can use .reindex() as an alternative",
1442+
FutureWarning, stacklevel=5)
1443+
1444+
return True
14251445

14261446
return True
14271447

pandas/io/formats/excel.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,15 @@ def __init__(self, df, na_rep='', float_format=None, cols=None,
353353
self.styler = None
354354
self.df = df
355355
if cols is not None:
356-
self.df = df.loc[:, cols]
356+
357+
# all missing, raise
358+
if not len(Index(cols) & df.columns):
359+
raise KeyError
360+
361+
# 1 missing is ok
362+
# TODO(jreback)k this should raise
363+
# on *any* missing columns
364+
self.df = df.reindex(columns=cols)
357365
self.columns = self.df.columns
358366
self.float_format = float_format
359367
self.index = index

pandas/tests/indexing/test_categorical.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ def test_loc_listlike(self):
111111
assert_frame_equal(result, expected, check_index_type=True)
112112

113113
# not all labels in the categories
114-
pytest.raises(KeyError, lambda: self.df2.loc[['a', 'd']])
114+
with pytest.raises(KeyError):
115+
self.df2.loc[['a', 'd']]
115116

116117
def test_loc_listlike_dtypes(self):
117118
# GH 11586

pandas/tests/indexing/test_datetime.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def test_series_partial_set_datetime(self):
223223
Timestamp('2011-01-03')]
224224
exp = Series([np.nan, 0.2, np.nan],
225225
index=pd.DatetimeIndex(keys, name='idx'), name='s')
226-
tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True)
226+
tm.assert_series_equal(ser.reindex(keys), exp, check_index_type=True)
227227

228228
def test_series_partial_set_period(self):
229229
# GH 11497
@@ -248,5 +248,5 @@ def test_series_partial_set_period(self):
248248
pd.Period('2011-01-03', freq='D')]
249249
exp = Series([np.nan, 0.2, np.nan],
250250
index=pd.PeriodIndex(keys, name='idx'), name='s')
251-
result = ser.loc[keys]
251+
result = ser.reindex(keys)
252252
tm.assert_series_equal(result, exp)

pandas/tests/indexing/test_iloc.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,8 @@ def test_iloc_non_unique_indexing(self):
588588
expected = DataFrame(new_list)
589589
expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()])
590590
])
591-
result = df2.loc[idx]
591+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
592+
result = df2.loc[idx]
592593
tm.assert_frame_equal(result, expected, check_index_type=False)
593594

594595
def test_iloc_empty_list_indexer_is_ok(self):

pandas/tests/indexing/test_indexing.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,8 @@ def test_dups_fancy_indexing(self):
176176
'test1': [7., 6, np.nan],
177177
'other': ['d', 'c', np.nan]}, index=rows)
178178

179-
result = df.loc[rows]
179+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
180+
result = df.loc[rows]
180181
tm.assert_frame_equal(result, expected)
181182

182183
# see GH5553, make sure we use the right indexer
@@ -186,7 +187,8 @@ def test_dups_fancy_indexing(self):
186187
'other': [np.nan, np.nan, np.nan,
187188
'd', 'c', np.nan]},
188189
index=rows)
189-
result = df.loc[rows]
190+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
191+
result = df.loc[rows]
190192
tm.assert_frame_equal(result, expected)
191193

192194
# inconsistent returns for unique/duplicate indices when values are
@@ -203,20 +205,23 @@ def test_dups_fancy_indexing(self):
203205

204206
# GH 4619; duplicate indexer with missing label
205207
df = DataFrame({"A": [0, 1, 2]})
206-
result = df.loc[[0, 8, 0]]
208+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
209+
result = df.loc[[0, 8, 0]]
207210
expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0])
208211
tm.assert_frame_equal(result, expected, check_index_type=False)
209212

210213
df = DataFrame({"A": list('abc')})
211-
result = df.loc[[0, 8, 0]]
214+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
215+
result = df.loc[[0, 8, 0]]
212216
expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0])
213217
tm.assert_frame_equal(result, expected, check_index_type=False)
214218

215219
# non unique with non unique selector
216220
df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C'])
217221
expected = DataFrame(
218222
{'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E'])
219-
result = df.loc[['A', 'A', 'E']]
223+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
224+
result = df.loc[['A', 'A', 'E']]
220225
tm.assert_frame_equal(result, expected)
221226

222227
# GH 5835
@@ -227,7 +232,8 @@ def test_dups_fancy_indexing(self):
227232
expected = pd.concat(
228233
[df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'],
229234
index=df.index)], axis=1)
230-
result = df.loc[:, ['A', 'B', 'C']]
235+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
236+
result = df.loc[:, ['A', 'B', 'C']]
231237
tm.assert_frame_equal(result, expected)
232238

233239
# GH 6504, multi-axis indexing

pandas/tests/indexing/test_loc.py

+25-5
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,15 @@ def test_loc_getitem_label_list(self):
152152
[Timestamp('20130102'), Timestamp('20130103')],
153153
typs=['ts'], axes=0)
154154

155+
def test_loc_getitem_label_list_with_missing(self):
155156
self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2],
156157
typs=['empty'], fails=KeyError)
157-
self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3],
158-
typs=['ints', 'uints'], axes=0, fails=KeyError)
159-
self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7],
160-
typs=['ints', 'uints'], axes=1, fails=KeyError)
158+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
159+
self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3],
160+
typs=['ints', 'uints'], axes=0, fails=KeyError)
161+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
162+
self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7],
163+
typs=['ints', 'uints'], axes=1, fails=KeyError)
161164
self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10],
162165
typs=['ints', 'uints'], axes=2, fails=KeyError)
163166

@@ -249,7 +252,7 @@ def test_loc_to_fail(self):
249252
pytest.raises(KeyError, lambda: s.loc[['4']])
250253

251254
s.loc[-1] = 3
252-
result = s.loc[[-1, -2]]
255+
result = s.reindex([-1, -2])
253256
expected = Series([3, np.nan], index=[-1, -2])
254257
tm.assert_series_equal(result, expected)
255258

@@ -277,6 +280,23 @@ def f():
277280

278281
pytest.raises(KeyError, f)
279282

283+
def test_loc_getitem_list_with_fail(self):
284+
# 15747
285+
# should KeyError if *any* missing labels
286+
287+
s = Series([1, 2, 3])
288+
289+
s.loc[[2]]
290+
291+
with pytest.raises(KeyError):
292+
s.loc[[3]]
293+
294+
# a non-match and a match
295+
with tm.assert_produces_warning(FutureWarning):
296+
expected = s.loc[[2, 3]]
297+
result = s.reindex([2, 3])
298+
tm.assert_series_equal(result, expected)
299+
280300
def test_loc_getitem_label_slice(self):
281301

282302
# label slices (with ints)

pandas/tests/indexing/test_partial.py

+79-27
Original file line numberDiff line numberDiff line change
@@ -222,52 +222,93 @@ def test_series_partial_set(self):
222222
# Regression from GH4825
223223
ser = Series([0.1, 0.2], index=[1, 2])
224224

225-
# loc
225+
# loc equiv to .reindex
226226
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
227-
result = ser.loc[[3, 2, 3]]
227+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
228+
result = ser.loc[[3, 2, 3]]
229+
tm.assert_series_equal(result, expected, check_index_type=True)
230+
231+
result = ser.reindex([3, 2, 3])
228232
tm.assert_series_equal(result, expected, check_index_type=True)
229233

230234
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x'])
231-
result = ser.loc[[3, 2, 3, 'x']]
235+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
236+
result = ser.loc[[3, 2, 3, 'x']]
237+
tm.assert_series_equal(result, expected, check_index_type=True)
238+
239+
result = ser.reindex([3, 2, 3, 'x'])
232240
tm.assert_series_equal(result, expected, check_index_type=True)
233241

234242
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
235243
result = ser.loc[[2, 2, 1]]
236244
tm.assert_series_equal(result, expected, check_index_type=True)
237245

238246
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1])
239-
result = ser.loc[[2, 2, 'x', 1]]
247+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
248+
result = ser.loc[[2, 2, 'x', 1]]
249+
tm.assert_series_equal(result, expected, check_index_type=True)
250+
251+
result = ser.reindex([2, 2, 'x', 1])
240252
tm.assert_series_equal(result, expected, check_index_type=True)
241253

242254
# raises as nothing in in the index
243255
pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]])
244256

245257
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
246-
result = ser.loc[[2, 2, 3]]
258+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
259+
result = ser.loc[[2, 2, 3]]
247260
tm.assert_series_equal(result, expected, check_index_type=True)
248261

262+
result = ser.reindex([2, 2, 3])
263+
tm.assert_series_equal(result, expected, check_index_type=True)
264+
265+
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
249266
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
250-
result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]]
267+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
268+
result = s.loc[[3, 4, 4]]
251269
tm.assert_series_equal(result, expected, check_index_type=True)
252270

271+
result = s.reindex([3, 4, 4])
272+
tm.assert_series_equal(result, expected, check_index_type=True)
273+
274+
s = Series([0.1, 0.2, 0.3, 0.4],
275+
index=[1, 2, 3, 4])
253276
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
254-
result = Series([0.1, 0.2, 0.3, 0.4],
255-
index=[1, 2, 3, 4]).loc[[5, 3, 3]]
277+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
278+
result = s.loc[[5, 3, 3]]
279+
tm.assert_series_equal(result, expected, check_index_type=True)
280+
281+
result = s.reindex([5, 3, 3])
256282
tm.assert_series_equal(result, expected, check_index_type=True)
257283

284+
s = Series([0.1, 0.2, 0.3, 0.4],
285+
index=[1, 2, 3, 4])
258286
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
259-
result = Series([0.1, 0.2, 0.3, 0.4],
260-
index=[1, 2, 3, 4]).loc[[5, 4, 4]]
287+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
288+
result = s.loc[[5, 4, 4]]
289+
tm.assert_series_equal(result, expected, check_index_type=True)
290+
291+
result = s.reindex([5, 4, 4])
261292
tm.assert_series_equal(result, expected, check_index_type=True)
262293

294+
s = Series([0.1, 0.2, 0.3, 0.4],
295+
index=[4, 5, 6, 7])
263296
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
264-
result = Series([0.1, 0.2, 0.3, 0.4],
265-
index=[4, 5, 6, 7]).loc[[7, 2, 2]]
297+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
298+
result = s.loc[[7, 2, 2]]
266299
tm.assert_series_equal(result, expected, check_index_type=True)
267300

301+
result = s.reindex([7, 2, 2])
302+
tm.assert_series_equal(result, expected, check_index_type=True)
303+
304+
s = Series([0.1, 0.2, 0.3, 0.4],
305+
index=[1, 2, 3, 4])
268306
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
269-
result = Series([0.1, 0.2, 0.3, 0.4],
270-
index=[1, 2, 3, 4]).loc[[4, 5, 5]]
307+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
308+
result = s.loc[[4, 5, 5]]
309+
tm.assert_series_equal(result, expected, check_index_type=True)
310+
311+
result = s.reindex([4, 5, 5])
271312
tm.assert_series_equal(result, expected, check_index_type=True)
272313

273314
# iloc
@@ -284,13 +325,15 @@ def test_series_partial_set_with_name(self):
284325
# loc
285326
exp_idx = Index([3, 2, 3], dtype='int64', name='idx')
286327
expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s')
287-
result = ser.loc[[3, 2, 3]]
328+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
329+
result = ser.loc[[3, 2, 3]]
288330
tm.assert_series_equal(result, expected, check_index_type=True)
289331

290332
exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx')
291333
expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx,
292334
name='s')
293-
result = ser.loc[[3, 2, 3, 'x']]
335+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
336+
result = ser.loc[[3, 2, 3, 'x']]
294337
tm.assert_series_equal(result, expected, check_index_type=True)
295338

296339
exp_idx = Index([2, 2, 1], dtype='int64', name='idx')
@@ -300,49 +343,58 @@ def test_series_partial_set_with_name(self):
300343

301344
exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx')
302345
expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s')
303-
result = ser.loc[[2, 2, 'x', 1]]
346+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
347+
result = ser.loc[[2, 2, 'x', 1]]
304348
tm.assert_series_equal(result, expected, check_index_type=True)
305349

306350
# raises as nothing in in the index
307351
pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]])
308352

309353
exp_idx = Index([2, 2, 3], dtype='int64', name='idx')
310354
expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s')
311-
result = ser.loc[[2, 2, 3]]
355+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
356+
result = ser.loc[[2, 2, 3]]
312357
tm.assert_series_equal(result, expected, check_index_type=True)
313358

314359
exp_idx = Index([3, 4, 4], dtype='int64', name='idx')
315360
expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s')
316361
idx = Index([1, 2, 3], dtype='int64', name='idx')
317-
result = Series([0.1, 0.2, 0.3], index=idx, name='s').loc[[3, 4, 4]]
362+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
363+
result = Series([0.1, 0.2, 0.3],
364+
index=idx,
365+
name='s').loc[[3, 4, 4]]
318366
tm.assert_series_equal(result, expected, check_index_type=True)
319367

320368
exp_idx = Index([5, 3, 3], dtype='int64', name='idx')
321369
expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s')
322370
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
323-
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
324-
name='s').loc[[5, 3, 3]]
371+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
372+
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
373+
name='s').loc[[5, 3, 3]]
325374
tm.assert_series_equal(result, expected, check_index_type=True)
326375

327376
exp_idx = Index([5, 4, 4], dtype='int64', name='idx')
328377
expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s')
329378
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
330-
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
331-
name='s').loc[[5, 4, 4]]
379+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
380+
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
381+
name='s').loc[[5, 4, 4]]
332382
tm.assert_series_equal(result, expected, check_index_type=True)
333383

334384
exp_idx = Index([7, 2, 2], dtype='int64', name='idx')
335385
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
336386
idx = Index([4, 5, 6, 7], dtype='int64', name='idx')
337-
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
338-
name='s').loc[[7, 2, 2]]
387+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
388+
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
389+
name='s').loc[[7, 2, 2]]
339390
tm.assert_series_equal(result, expected, check_index_type=True)
340391

341392
exp_idx = Index([4, 5, 5], dtype='int64', name='idx')
342393
expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s')
343394
idx = Index([1, 2, 3, 4], dtype='int64', name='idx')
344-
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
345-
name='s').loc[[4, 5, 5]]
395+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
396+
result = Series([0.1, 0.2, 0.3, 0.4], index=idx,
397+
name='s').loc[[4, 5, 5]]
346398
tm.assert_series_equal(result, expected, check_index_type=True)
347399

348400
# iloc

0 commit comments

Comments
 (0)