From e6a97063db4af507538e399493c6b228a8938582 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 12 Jul 2019 11:50:01 +0100 Subject: [PATCH 1/2] TST/CLN: Add message checks to raises KeyError tests --- .../tests/frame/test_axis_select_reindex.py | 6 ++- pandas/tests/frame/test_duplicates.py | 7 ++- pandas/tests/frame/test_indexing.py | 26 +++++++--- pandas/tests/frame/test_mutate_columns.py | 10 ++-- pandas/tests/generic/test_generic.py | 4 +- pandas/tests/groupby/test_timegrouper.py | 2 +- .../tests/indexes/datetimes/test_indexing.py | 8 +-- .../tests/indexes/interval/test_interval.py | 11 +++- .../indexes/interval/test_interval_new.py | 51 ++++++++++++++++--- .../indexes/interval/test_interval_tree.py | 12 +++-- .../indexes/multi/test_partial_indexing.py | 4 +- pandas/tests/indexes/multi/test_sorting.py | 2 +- pandas/tests/indexes/period/test_indexing.py | 2 +- pandas/tests/indexes/test_category.py | 6 +-- pandas/tests/indexes/test_common.py | 4 +- pandas/tests/indexes/test_range.py | 6 +-- .../tests/indexing/interval/test_interval.py | 18 +++---- .../indexing/interval/test_interval_new.py | 40 +++++++++------ pandas/tests/indexing/test_categorical.py | 6 ++- pandas/tests/indexing/test_indexing.py | 22 +++++--- pandas/tests/indexing/test_loc.py | 8 ++- pandas/tests/indexing/test_scalar.py | 8 +-- pandas/tests/io/excel/test_writers.py | 4 +- pandas/tests/io/pytables/test_pytables.py | 26 ++++++---- 24 files changed, 198 insertions(+), 95 deletions(-) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 77be952506964..1ef10ea5857d0 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1,4 +1,5 @@ from datetime import datetime +import re import numpy as np import pytest @@ -1120,9 +1121,10 @@ def test_raise_on_drop_duplicate_index(self, actual): # issue 19186 level = 0 if isinstance(actual.index, MultiIndex) else None - with pytest.raises(KeyError): + msg = re.escape("\"['c'] not found in axis\"") + with pytest.raises(KeyError, match=msg): actual.drop("c", level=level, axis=0) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=msg): actual.T.drop("c", level=level, axis=1) expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore") assert_frame_equal(expected_no_err, actual) diff --git a/pandas/tests/frame/test_duplicates.py b/pandas/tests/frame/test_duplicates.py index 0ea24777ae1f5..d2a1fc43d2046 100644 --- a/pandas/tests/frame/test_duplicates.py +++ b/pandas/tests/frame/test_duplicates.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -9,11 +11,12 @@ def test_duplicated_with_misspelled_column_name(subset): # GH 19730 df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]}) + msg = re.escape("Index(['a'], dtype='object')") - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=msg): df.duplicated(subset) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=msg): df.drop_duplicates(subset) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index c2d38b2938fca..f0c4cf7545fba 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1,4 +1,5 @@ from datetime import date, datetime, time, timedelta +import re from warnings import catch_warnings, simplefilter import numpy as np @@ -59,7 +60,7 @@ def test_getitem(self, float_frame): ad = np.random.randn(len(df)) df["@awesome_domain"] = ad - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("'df[\"$10\"]'")): df.__getitem__('df["$10"]') res = df["@awesome_domain"] @@ -67,7 +68,8 @@ def test_getitem(self, float_frame): def test_getitem_dupe_cols(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) - with pytest.raises(KeyError): + msg = "\"None of [Index(['baf'], dtype='object')] are in the [columns]\"" + with pytest.raises(KeyError, match=re.escape(msg)): df[["baf"]] def test_get(self, float_frame): @@ -446,14 +448,16 @@ def test_getitem_setitem_ix_negative_integers(self, float_frame): df = DataFrame(np.random.randn(8, 4)) # ix does label-based indexing when having an integer index + msg = "\"None of [Int64Index([-1], dtype='int64')] are in the [index]\"" with catch_warnings(record=True): simplefilter("ignore", FutureWarning) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape(msg)): df.ix[[-1]] + msg = "\"None of [Int64Index([-1], dtype='int64')] are in the [columns]\"" with catch_warnings(record=True): simplefilter("ignore", FutureWarning) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape(msg)): df.ix[:, [-1]] # #1942 @@ -497,7 +501,11 @@ def test_setitem(self, float_frame): float_frame["col6"] = series tm.assert_series_equal(series, float_frame["col6"], check_names=False) - with pytest.raises(KeyError): + msg = ( + r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the" + r" \[columns\]\"" + ) + with pytest.raises(KeyError, match=msg): float_frame[np.random.randn(len(float_frame) + 1)] = 1 # set ndarray @@ -1884,10 +1892,10 @@ def test_lookup_bool(self): assert df["mask"].dtype == np.bool_ def test_lookup_raises(self, float_frame): - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'One or more row labels was not found'"): float_frame.lookup(["xyz"], ["A"]) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'One or more column labels was not found'"): float_frame.lookup([float_frame.index[0]], ["xyz"]) with pytest.raises(ValueError, match="same size"): @@ -2543,7 +2551,9 @@ def test_xs(self, float_frame, datetime_frame): assert xs["A"] == 1 assert xs["B"] == "1" - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')") + ): datetime_frame.xs(datetime_frame.index[0] - BDay()) # xs get column diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index ed9eeb594f7f6..135ec40c8c15c 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -88,9 +90,9 @@ def test_assign_dependent_old_python(self): df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) # Key C does not exist at definition time of df - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=""): df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=""): df.assign(C=df.A, D=lambda x: x["A"] + x["C"]) @pytest.mark.skipif( @@ -219,14 +221,14 @@ def test_delitem_multiindex(self): # A still in the levels, BUT get a KeyError if trying # to delete assert ("A",) not in df.columns - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("('A',)")): del df[("A",)] # behavior of dropped/deleted MultiIndex levels changed from # GH 2770 to GH 19027: MultiIndex no longer '.__contains__' # levels which are dropped/deleted assert "A" not in df.columns - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("('A',)")): del df["A"] def test_pop(self, float_frame): diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index b2b38980d0ceb..7b9e50ebbf342 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -618,7 +618,9 @@ def test_sample(sel): df.sample(n=1, weights="weight_column", axis=1) # Check weighting key error - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match="'String passed to weights not a valid column'" + ): df.sample(n=3, weights="not_a_real_column_name") # Check that re-normalizes weights that don't sum to one. diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index d201b887739ec..e1e35d8eb7d18 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -206,7 +206,7 @@ def test_timegrouper_with_reg_groups(self): result = df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"]).sum() assert_frame_equal(result, expected) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'The grouper name foo is not found'"): df.groupby([pd.Grouper(freq="1M", key="foo"), "Buyer"]).sum() # passing the level diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 2a5bbdbb131ed..cd5efc86320c2 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -614,7 +614,7 @@ def test_get_loc(self): ) with pytest.raises(ValueError, match="unit abbreviation w/o a number"): idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'2000-01-01T03'"): idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") with pytest.raises( ValueError, match="tolerance size must match target index size" @@ -634,12 +634,12 @@ def test_get_loc(self): assert idx.get_loc("1999", method="nearest") == 0 assert idx.get_loc("2001", method="nearest") == 2 - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'1999'"): idx.get_loc("1999", method="pad") - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'2001'"): idx.get_loc("2001", method="backfill") - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'foobar'"): idx.get_loc("foobar") with pytest.raises(TypeError): idx.get_loc(slice(2)) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 962ed2b1cf8ed..c61af1ce70aed 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -445,7 +445,7 @@ def test_get_loc_length_one_scalar(self, scalar, closed): result = index.get_loc(scalar) assert result == 0 else: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=str(scalar)): index.get_loc(scalar) @pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"]) @@ -458,7 +458,14 @@ def test_get_loc_length_one_interval(self, left, right, closed, other_closed): result = index.get_loc(interval) assert result == 0 else: - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=re.escape( + "Interval({left}, {right}, closed='{other_closed}')".format( + left=left, right=right, other_closed=other_closed + ) + ), + ): index.get_loc(interval) # Make consistent with test_interval_new.py (see #16316, #16386) diff --git a/pandas/tests/indexes/interval/test_interval_new.py b/pandas/tests/indexes/interval/test_interval_new.py index ab9f7ef1c3e26..d92559d2e3e49 100644 --- a/pandas/tests/indexes/interval/test_interval_new.py +++ b/pandas/tests/indexes/interval/test_interval_new.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -15,16 +17,21 @@ def test_get_loc_interval(self, closed, side): for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]: # if get_loc is supplied an interval, it should only search # for exact matches, not overlaps or covers, else KeyError. + msg = re.escape( + "Interval({bound[0]}, {bound[1]}, closed='{side}')".format( + bound=bound, side=side + ) + ) if closed == side: if bound == [0, 1]: assert idx.get_loc(Interval(0, 1, closed=side)) == 0 elif bound == [2, 3]: assert idx.get_loc(Interval(2, 3, closed=side)) == 1 else: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=msg): idx.get_loc(Interval(*bound, closed=side)) else: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=msg): idx.get_loc(Interval(*bound, closed=side)) @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]) @@ -81,18 +88,42 @@ def test_slice_locs_with_interval(self): # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=re.escape( + '"Cannot get left slice bound for non-unique label:' + " Interval(0, 2, closed='right')\"" + ), + ): index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=re.escape( + '"Cannot get left slice bound for non-unique label:' + " Interval(0, 2, closed='right')\"" + ), + ): index.slice_locs(start=Interval(0, 2)) assert index.slice_locs(end=Interval(2, 4)) == (0, 2) - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=re.escape( + '"Cannot get right slice bound for non-unique label:' + " Interval(0, 2, closed='right')\"" + ), + ): index.slice_locs(end=Interval(0, 2)) - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=re.escape( + '"Cannot get right slice bound for non-unique label:' + " Interval(0, 2, closed='right')\"" + ), + ): index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) # another unsorted duplicates @@ -139,7 +170,13 @@ def test_slice_locs_with_ints_and_floats_succeeds(self): def test_slice_locs_with_ints_and_floats_errors(self, tuples, query): start, stop = query index = IntervalIndex.from_tuples(tuples) - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=( + "'can only get slices from an IntervalIndex if bounds are" + " non-overlapping and all monotonic increasing or decreasing'" + ), + ): index.slice_locs(start, stop) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py index b7104242b5ccc..87f9eaa209277 100644 --- a/pandas/tests/indexes/interval/test_interval_tree.py +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -62,7 +62,7 @@ def test_get_loc(self, tree): expected = np.array([0, 1], dtype="intp") tm.assert_numpy_array_equal(result, expected) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="-1"): tree.get_loc(-1) def test_get_indexer(self, tree): @@ -70,7 +70,9 @@ def test_get_indexer(self, tree): expected = np.array([0, 4, -1], dtype="intp") tm.assert_numpy_array_equal(result, expected) - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): tree.get_indexer(np.array([3.0])) def test_get_indexer_non_unique(self, tree): @@ -100,7 +102,9 @@ def test_duplicates(self, dtype): expected = np.array([0, 1, 2], dtype="intp") tm.assert_numpy_array_equal(result, expected) - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): tree.get_indexer(np.array([0.5])) indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) @@ -116,7 +120,7 @@ def test_get_loc_closed(self, closed): tree = IntervalTree([0], [1], closed=closed) for p, errors in [(0, tree.open_left), (1, tree.open_right)]: if errors: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=str(p)): tree.get_loc(p) else: result = tree.get_loc(p) diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py index d6799e86683a9..5db1296d828ca 100644 --- a/pandas/tests/indexes/multi/test_partial_indexing.py +++ b/pandas/tests/indexes/multi/test_partial_indexing.py @@ -54,7 +54,7 @@ def test_partial_string_timestamp_multiindex(): # ambiguous and we don't want to extend this behavior forward to work # in multi-indexes. This would amount to selecting a scalar from a # column. - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'2016-01-01'"): df["2016-01-01"] # partial string match on year only @@ -83,7 +83,7 @@ def test_partial_string_timestamp_multiindex(): tm.assert_frame_equal(result, expected) # Slicing date on first level should break (of course) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'2016-01-01'"): df_swap.loc["2016-01-01"] # GH12685 (partial string with daily resolution or below) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index c62bc80cfb53f..3dee1dbecf3ba 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -115,7 +115,7 @@ def test_unsortedindex(): df.sort_index(inplace=True) assert len(df.loc(axis=0)["z", :]) == 2 - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'q'"): df.loc(axis=0)["q", :] diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 3f66891caddc3..cf03e2c7847f0 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -200,7 +200,7 @@ def test_getitem_day(self): invalid = ["2013/02/01 9H", "2013/02/01 09:00"] for v in invalid: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=v): s[v] diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 2b9632acd83ca..e79991f652154 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -697,7 +697,7 @@ def test_get_loc(self): assert cidx1.get_loc("e") == idx1.get_loc("e") for i in [cidx1, idx1]: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'NOT-EXIST'"): i.get_loc("NOT-EXIST") # non-unique @@ -716,7 +716,7 @@ def test_get_loc(self): assert res == 4 for i in [cidx2, idx2]: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'NOT-EXIST'"): i.get_loc("NOT-EXIST") # non-unique, sliceable @@ -733,7 +733,7 @@ def test_get_loc(self): assert res == slice(2, 5, None) for i in [cidx3, idx3]: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'c'"): i.get_loc("c") def test_repr_roundtrip(self): diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 0400b7810ecc9..605df9971a567 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -34,7 +34,9 @@ def test_droplevel(self, indices): indices.droplevel(level) for level in "wrong", ["wrong"]: - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match=re.escape("'Level wrong must be same as name (None)'") + ): indices.droplevel(level) def test_constructor_non_hashable_name(self, indices): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 213d9c6505229..58b98297f00f3 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -311,7 +311,7 @@ def test_cached_data(self): df.loc[50] assert idx._cached_data is None - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="51"): df.loc[51] assert idx._cached_data is None @@ -1027,13 +1027,13 @@ def test_engineless_lookup(self): tm.assert_numpy_array_equal( idx.get_indexer([2, 8]), ensure_platform_int(np.array([0, 2])) ) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="3"): idx.get_loc(3) assert "_engine" not in idx._cache # The engine is still required for lookup of a different dtype scalar: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'a'"): assert idx.get_loc("a") == -1 assert "_engine" in idx._cache diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 1bdb665101d41..7ae42782774db 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -41,9 +41,9 @@ def test_nonoverlapping_monotonic(self, direction, closed): assert s[key] == expected assert s.loc[key] == expected else: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=str(key)): s[key] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=str(key)): s.loc[key] for key, expected in zip(idx.right, s): @@ -51,9 +51,9 @@ def test_nonoverlapping_monotonic(self, direction, closed): assert s[key] == expected assert s.loc[key] == expected else: - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=str(key)): s[key] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=str(key)): s.loc[key] for key, expected in zip(idx.mid, s): @@ -65,10 +65,10 @@ def test_non_matching(self): # this is a departure from our current # indexin scheme, but simpler - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^$"): s.loc[[-1, 3, 4, 5]] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^$"): s.loc[[-1, 3]] def test_large_series(self): @@ -93,7 +93,7 @@ def test_loc_getitem_frame(self): expected = df.iloc[4:6] tm.assert_frame_equal(result, expected) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="10"): df.loc[10] # single list-like @@ -106,9 +106,9 @@ def test_loc_getitem_frame(self): expected = df.take([4, 5, 4, 5]) tm.assert_frame_equal(result, expected) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^$"): df.loc[[10]] # partial missing - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^$"): df.loc[[10, 4]] diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py index 92c71bbc6eb32..a86a9d16d3f9f 100644 --- a/pandas/tests/indexing/interval/test_interval_new.py +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -30,31 +32,35 @@ def test_loc_with_interval(self): tm.assert_series_equal(expected, result) # missing or not exact - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): s.loc[Interval(3, 5, closed="left")] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): s[Interval(3, 5, closed="left")] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s[Interval(3, 5)] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s.loc[Interval(3, 5)] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s[Interval(3, 5)] - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match=re.escape("Interval(-2, 0, closed='right')") + ): s.loc[Interval(-2, 0)] - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match=re.escape("Interval(-2, 0, closed='right')") + ): s[Interval(-2, 0)] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): s.loc[Interval(5, 6)] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): s[Interval(5, 6)] def test_loc_with_scalar(self): @@ -175,16 +181,16 @@ def test_loc_with_overlap(self): result = s[[Interval(1, 5), Interval(3, 7)]] tm.assert_series_equal(expected, result) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s.loc[Interval(3, 5)] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^$"): s.loc[[Interval(3, 5)]] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): s[Interval(3, 5)] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^$"): s[[Interval(3, 5)]] # slices with interval (only exact matches) @@ -195,15 +201,17 @@ def test_loc_with_overlap(self): result = s[Interval(1, 5) : Interval(3, 7)] tm.assert_series_equal(expected, result) - with pytest.raises(KeyError): + msg = "'can only get slices from an IntervalIndex if bounds are" + " non-overlapping and all monotonic increasing or decreasing'" + with pytest.raises(KeyError, match=msg): s.loc[Interval(1, 6) : Interval(3, 8)] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=msg): s[Interval(1, 6) : Interval(3, 8)] # slices with scalar raise for overlapping intervals # TODO KeyError is the appropriate error? - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=msg): s.loc[1:4] def test_non_unique(self): diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 0dccf023c66f8..c365c985eb4b6 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -355,7 +355,11 @@ def test_loc_listlike(self): assert_frame_equal(result, expected, check_index_type=True) # not all labels in the categories - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match="'a list-indexer must only include values that are in the" + " categories'", + ): self.df2.loc[["a", "d"]] def test_loc_listlike_dtypes(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index ba144909724cf..d604758565b86 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1,6 +1,7 @@ """ test fancy indexing & misc """ from datetime import datetime +import re from warnings import catch_warnings, simplefilter import weakref @@ -336,7 +337,12 @@ def test_dups_fancy_indexing(self): # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=re.escape( + "\"None of [Index(['E'], dtype='object')] are in the [index]\"" + ), + ): dfnu.loc[["E"]] # ToDo: check_index_type can be True after GH 11497 @@ -425,7 +431,7 @@ def test_multitype_list_index_access(self): # GH 10610 df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=re.escape("'[-8, 26] not in index'")): df[[22, 26, -8]] assert df[21].shape[0] == df.shape[0] @@ -641,18 +647,18 @@ def test_string_slice(self): # dtype should properly raises KeyError df = DataFrame([1], Index([pd.Timestamp("2011-01-01")], dtype=object)) assert df.index.is_all_dates - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'2011'"): df["2011"] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'2011'"): df.loc["2011", 0] df = DataFrame() assert not df.index.is_all_dates - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'2011'"): df["2011"] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'2011'"): df.loc["2011", 0] def test_astype_assignment(self): @@ -855,9 +861,9 @@ def test_mixed_index_assignment(self): def test_mixed_index_no_fallback(self): # GH 19860 s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^0$"): s.at[0] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^4$"): s.at[4] def test_rhs_alignment(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d749e697c8282..9afa141b365e4 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,5 +1,6 @@ """ test label based indexing with loc """ from io import StringIO +import re from warnings import catch_warnings, filterwarnings import numpy as np @@ -425,7 +426,12 @@ def test_loc_getitem_list_with_fail(self): s.loc[[2]] - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=re.escape( + "\"None of [Int64Index([3], dtype='int64')] are in the [index]\"" + ), + ): s.loc[[3]] # a non-match and a match diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index e6ccee684b76b..3eb16e0a64b6d 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -203,9 +203,9 @@ def test_mixed_index_at_iat_loc_iloc_series(self): for i in range(len(s)): assert s.iat[i] == s.iloc[i] == i + 1 - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^4$"): s.at[4] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^4$"): s.loc[4] def test_mixed_index_at_iat_loc_iloc_dataframe(self): @@ -221,9 +221,9 @@ def test_mixed_index_at_iat_loc_iloc_dataframe(self): for i in range(5): assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^3$"): df.at[0, 3] - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="^3$"): df.loc[0, 3] def test_iat_setter_incompatible_assignment(self): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 0908ed885a6ca..8ad09549f3cbe 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1024,7 +1024,9 @@ def test_invalid_columns(self, engine, ext): read_frame = pd.read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(expected, read_frame) - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match="'passes columns are not ALL present dataframe'" + ): write_frame.to_excel(self.path, "test1", columns=["C", "D"]) def test_comment_arg(self, engine, ext): diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index 946334b5df05e..d67f2c3b7bd66 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -4,6 +4,7 @@ from distutils.version import LooseVersion from io import BytesIO import os +import re import tempfile from warnings import catch_warnings, simplefilter @@ -648,7 +649,7 @@ def test_get(self): right = store["/a"] tm.assert_series_equal(left, right) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'No object named b in the file'"): store.get("b") @pytest.mark.parametrize( @@ -1300,7 +1301,7 @@ def test_read_missing_key_close_store(self): df = pd.DataFrame({"a": range(2), "b": range(2)}) df.to_hdf(path, "k1") - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'No object named k2 in the file'"): pd.read_hdf(path, "k2") # smoke test to test that file is properly closed after @@ -1953,7 +1954,7 @@ def check(obj, comparator): # 0 len df_empty = DataFrame(columns=list("ABC")) store.append("df", df_empty) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'No object named df in the file'"): store.select("df") # repeated append of 0/non-zero frames @@ -2237,7 +2238,9 @@ def test_remove(self): assert len(store) == 0 # nonexistence - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match="'No object named a_nonexistent_store in the file'" + ): store.remove("a_nonexistent_store") # pathing @@ -3530,7 +3533,9 @@ def test_read_column(self): store.append("df", df) # error - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match=re.escape("'column [foo] not found in the table'") + ): store.select_column("df", "foo") with pytest.raises(Exception): @@ -3780,15 +3785,16 @@ def test_select_as_multiple(self): with pytest.raises(Exception): store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1") - with pytest.raises(KeyError): + msg = "'No object named df3 in the file'" + with pytest.raises(KeyError, match=msg): store.select_as_multiple( ["df1", "df3"], where=["A>0", "B>0"], selector="df1" ) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=msg): store.select_as_multiple(["df3"], where=["A>0", "B>0"], selector="df1") - with pytest.raises(KeyError): + with pytest.raises(KeyError, match="'No object named df4 in the file'"): store.select_as_multiple( ["df1", "df2"], where=["A>0", "B>0"], selector="df4" ) @@ -4502,7 +4508,9 @@ def test_categorical(self): assert result is not None store.remove("df3") - with pytest.raises(KeyError): + with pytest.raises( + KeyError, match="'No object named df3/meta/s/meta in the file'" + ): store.select("df3/meta/s/meta") def test_categorical_conversion(self): From 67027c7e339eeb902a2cccfbe733bdc7dbd17090 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 12 Jul 2019 12:33:52 +0100 Subject: [PATCH 2/2] insert missing messages --- pandas/tests/frame/test_mutate_columns.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 135ec40c8c15c..7ad5abca82b29 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -90,9 +90,9 @@ def test_assign_dependent_old_python(self): df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) # Key C does not exist at definition time of df - with pytest.raises(KeyError, match=""): + with pytest.raises(KeyError, match="^'C'$"): df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) - with pytest.raises(KeyError, match=""): + with pytest.raises(KeyError, match="^'C'$"): df.assign(C=df.A, D=lambda x: x["A"] + x["C"]) @pytest.mark.skipif(