From b930303e111767ccb7ecb29828d33dc555195cb4 Mon Sep 17 00:00:00 2001 From: Jose Rivera-Rubio Date: Sat, 10 Nov 2018 22:47:24 +0100 Subject: [PATCH 01/11] DOC: Updating Series.resample and DataFrame.resample docstrings (#23197) --- ci/code_checks.sh | 2 +- pandas/core/generic.py | 218 ++++++++++++++++++++++++++--------------- 2 files changed, 140 insertions(+), 80 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c4b483a794c21..eba96f0c6c2fc 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -151,7 +151,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests generic.py' ; echo $MSG pytest -q --doctest-modules pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs" + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs" RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests top-level reshaping functions' ; echo $MSG diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 53cdc46fdd16b..cfdc6b34274bf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7500,46 +7500,67 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, label=None, convention='start', kind=None, loffset=None, limit=None, base=0, on=None, level=None): """ + Resample time-series data. + Convenience method for frequency conversion and resampling of time - series. Object must have a datetime-like index (DatetimeIndex, - PeriodIndex, or TimedeltaIndex), or pass datetime-like values - to the on or level keyword. + series. Object must have a datetime-like index (`DatetimeIndex`, + `PeriodIndex`, or `TimedeltaIndex`), or pass datetime-like values + to the `on` or `level` keyword. Parameters ---------- - rule : string - the offset string or object representing target conversion - axis : int, optional, default 0 - closed : {'right', 'left'} + rule : str + The offset string or object representing target conversion. + how : str + Method for down/re-sampling, default to 'mean' for downsampling. + + .. deprecated:: 0.18.0 + The new syntax is ``.resample(...).mean()``, or + ``.resample(...).apply()`` + axis : {0 or 'index', 1 or 'columns'}, default 0 + Which axis to use for up- or down-sampling. For `Series` this + will default to 0, i.e. along the rows. Must be + `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. + fill_method : str, default None + Filling method for upsampling. + + .. deprecated:: 0.18.0 + The new syntax is ``.resample(...).()``, + e.g. ``.resample(...).pad()`` + closed : {'right', 'left'}, default None Which side of bin interval is closed. The default is 'left' for all frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' which all have a default of 'right'. - label : {'right', 'left'} + label : {'right', 'left'}, default None Which bin edge label to label bucket with. The default is 'left' for all frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' which all have a default of 'right'. - convention : {'start', 'end', 's', 'e'} - For PeriodIndex only, controls whether to use the start or end of - `rule` - kind: {'timestamp', 'period'}, optional + convention : {'start', 'end', 's', 'e'}, default 'start' + For `PeriodIndex` only, controls whether to use the start or + end of `rule`. + kind : {'timestamp', 'period'}, optional, default None Pass 'timestamp' to convert the resulting index to a - ``DateTimeIndex`` or 'period' to convert it to a ``PeriodIndex``. + `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. By default the input representation is retained. - loffset : timedelta - Adjust the resampled time labels + loffset : timedelta, default None + Adjust the resampled time labels. + limit : int, default None + Maximum size gap when reindexing with `fill_method`. + + .. deprecated:: 0.18.0 base : int, default 0 For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0 - on : string, optional + range from 0 through 4. Defaults to 0. + on : str, optional For a DataFrame, column to use instead of index for resampling. Column must be datetime-like. .. versionadded:: 0.19.0 - level : string or int, optional + level : str or int, optional For a MultiIndex, level (name or number) to use for - resampling. Level must be datetime-like. + resampling. `level` must be datetime-like. .. versionadded:: 0.19.0 @@ -7556,6 +7577,12 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, To learn more about the offset strings, please see `this link `__. + See Also + -------- + groupby : Group by mapping, function, label, or list of labels. + Series.resample : Resample a Series. + DataFrame.resample: Resample a DataFrame. + Examples -------- @@ -7612,7 +7639,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, Upsample the series into 30 second bins. - >>> series.resample('30S').asfreq()[0:5] #select first 5 rows + >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows 2000-01-01 00:00:00 0.0 2000-01-01 00:00:30 NaN 2000-01-01 00:01:00 1.0 @@ -7645,8 +7672,8 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, Pass a custom function via ``apply`` >>> def custom_resampler(array_like): - ... return np.sum(array_like)+5 - + ... return np.sum(array_like) + 5 + ... >>> series.resample('3T').apply(custom_resampler) 2000-01-01 00:00:00 8 2000-01-01 00:03:00 17 @@ -7656,73 +7683,106 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, For a Series with a PeriodIndex, the keyword `convention` can be used to control whether to use the start or end of `rule`. + Resample a year by quarter using 'start' `convention`. Values are + assigned to the first quarter of the period. + >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', - freq='A', - periods=2)) + ... freq='A', + ... periods=2)) >>> s 2012 1 2013 2 Freq: A-DEC, dtype: int64 - - Resample by month using 'start' `convention`. Values are assigned to - the first month of the period. - - >>> s.resample('M', convention='start').asfreq().head() - 2012-01 1.0 - 2012-02 NaN - 2012-03 NaN - 2012-04 NaN - 2012-05 NaN - Freq: M, dtype: float64 - - Resample by month using 'end' `convention`. Values are assigned to - the last month of the period. - - >>> s.resample('M', convention='end').asfreq() - 2012-12 1.0 - 2013-01 NaN - 2013-02 NaN - 2013-03 NaN - 2013-04 NaN - 2013-05 NaN - 2013-06 NaN - 2013-07 NaN - 2013-08 NaN - 2013-09 NaN - 2013-10 NaN - 2013-11 NaN - 2013-12 2.0 + >>> s.resample('Q', convention='start').asfreq() + 2012Q1 1.0 + 2012Q2 NaN + 2012Q3 NaN + 2012Q4 NaN + 2013Q1 2.0 + 2013Q2 NaN + 2013Q3 NaN + 2013Q4 NaN + Freq: Q-DEC, dtype: float64 + + Resample quarters by month using 'end' `convention`. Values are + assigned to the last month of the period. + + >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01', + ... freq='Q', + ... periods=4)) + >>> q + 2018Q1 1 + 2018Q2 2 + 2018Q3 3 + 2018Q4 4 + Freq: Q-DEC, dtype: int64 + >>> q.resample('M', convention='end').asfreq() + 2018-03 1.0 + 2018-04 NaN + 2018-05 NaN + 2018-06 2.0 + 2018-07 NaN + 2018-08 NaN + 2018-09 3.0 + 2018-10 NaN + 2018-11 NaN + 2018-12 4.0 Freq: M, dtype: float64 - For DataFrame objects, the keyword ``on`` can be used to specify the + For DataFrame objects, the keyword `on` can be used to specify the column instead of the index for resampling. - >>> df = pd.DataFrame(data=9*[range(4)], columns=['a', 'b', 'c', 'd']) - >>> df['time'] = pd.date_range('1/1/2000', periods=9, freq='T') - >>> df.resample('3T', on='time').sum() - a b c d - time - 2000-01-01 00:00:00 0 3 6 9 - 2000-01-01 00:03:00 0 3 6 9 - 2000-01-01 00:06:00 0 3 6 9 - - For a DataFrame with MultiIndex, the keyword ``level`` can be used to - specify on level the resampling needs to take place. - - >>> time = pd.date_range('1/1/2000', periods=5, freq='T') - >>> df2 = pd.DataFrame(data=10*[range(4)], - columns=['a', 'b', 'c', 'd'], - index=pd.MultiIndex.from_product([time, [1, 2]]) - ) - >>> df2.resample('3T', level=0).sum() - a b c d - 2000-01-01 00:00:00 0 6 12 18 - 2000-01-01 00:03:00 0 4 8 12 - - See also - -------- - groupby : Group by mapping, function, label, or list of labels. + >>> d = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}) + >>> df = pd.DataFrame(d) + >>> df['week_starting'] = pd.date_range('01/01/2018', + ... periods=8, + ... freq='W') + >>> df + price volume week_starting + 0 10 50 2018-01-07 + 1 11 60 2018-01-14 + 2 9 40 2018-01-21 + 3 13 100 2018-01-28 + 4 14 50 2018-02-04 + 5 18 100 2018-02-11 + 6 17 40 2018-02-18 + 7 19 50 2018-02-25 + >>> df.resample('M', on='week_starting').mean() + price volume + week_starting + 2018-01-31 10.75 62.5 + 2018-02-28 17.00 60.0 + + For a DataFrame with MultiIndex, the keyword `level` can be used to + specify on which level the resampling needs to take place. + + >>> days = pd.date_range('1/1/2000', periods=4, freq='D') + >>> d2 = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}) + >>> df2 = pd.DataFrame(d2, + ... index=pd.MultiIndex.from_product([days, + ... ['morning', + ... 'afternoon']] + ... )) + >>> df2 + price volume + 2000-01-01 morning 10 50 + afternoon 11 60 + 2000-01-02 morning 9 40 + afternoon 13 100 + 2000-01-03 morning 14 50 + afternoon 18 100 + 2000-01-04 morning 17 40 + afternoon 19 50 + >>> df2.resample('D', level=0).sum() + price volume + 2000-01-01 21 110 + 2000-01-02 22 140 + 2000-01-03 32 150 + 2000-01-04 36 90 """ + from pandas.core.resample import (resample, _maybe_process_deprecations) axis = self._get_axis_number(axis) From 383d0525831d7301ae4ab26bb8c2d475ee4ab72a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 10 Nov 2018 14:13:34 -0800 Subject: [PATCH 02/11] MAINT: tm.assert_raises_regex --> pytest.raises (#23592) * MAINT: tm.assert_raises_regex --> pytest.raises pytest.raises has all of the functionality that we need from tm.assert_raises_regex. Closes gh-16521. * Don't remove, just deprecate assert_raises_regex * CLN: Test cleanups and follow-ups --- pandas/tests/arithmetic/test_datetime64.py | 18 +- pandas/tests/arithmetic/test_period.py | 54 +++--- pandas/tests/arithmetic/test_timedelta64.py | 24 +-- pandas/tests/arrays/categorical/test_algos.py | 2 +- .../arrays/categorical/test_analytics.py | 3 +- pandas/tests/arrays/categorical/test_api.py | 4 +- .../arrays/categorical/test_constructors.py | 12 +- .../tests/arrays/categorical/test_dtypes.py | 2 +- .../tests/arrays/categorical/test_indexing.py | 2 +- .../tests/arrays/categorical/test_missing.py | 2 +- .../arrays/categorical/test_operators.py | 8 +- .../tests/arrays/categorical/test_sorting.py | 9 +- pandas/tests/arrays/interval/test_interval.py | 2 +- pandas/tests/arrays/interval/test_ops.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 87 +++++---- pandas/tests/arrays/sparse/test_dtype.py | 3 +- pandas/tests/arrays/sparse/test_libsparse.py | 14 +- pandas/tests/arrays/test_integer.py | 14 +- pandas/tests/arrays/test_period.py | 26 +-- pandas/tests/computation/test_eval.py | 54 +++--- pandas/tests/dtypes/test_common.py | 3 +- pandas/tests/dtypes/test_dtypes.py | 18 +- pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/extension/base/constructors.py | 3 +- pandas/tests/extension/base/getitem.py | 5 +- pandas/tests/extension/base/methods.py | 4 +- pandas/tests/extension/base/setitem.py | 7 +- .../tests/extension/decimal/test_decimal.py | 2 +- pandas/tests/extension/json/test_json.py | 2 +- pandas/tests/extension/test_categorical.py | 3 +- pandas/tests/extension/test_interval.py | 3 +- pandas/tests/extension/test_period.py | 3 +- pandas/tests/frame/test_alter_axes.py | 64 +++---- pandas/tests/frame/test_analytics.py | 23 +-- pandas/tests/frame/test_api.py | 19 +- pandas/tests/frame/test_arithmetic.py | 12 +- .../tests/frame/test_axis_select_reindex.py | 38 ++-- pandas/tests/frame/test_block_internals.py | 2 +- pandas/tests/frame/test_combine_concat.py | 14 +- pandas/tests/frame/test_constructors.py | 80 ++++---- pandas/tests/frame/test_dtypes.py | 22 +-- pandas/tests/frame/test_indexing.py | 43 +++-- pandas/tests/frame/test_join.py | 11 +- pandas/tests/frame/test_missing.py | 18 +- pandas/tests/frame/test_mutate_columns.py | 6 +- pandas/tests/frame/test_nonunique_indexes.py | 7 +- pandas/tests/frame/test_operators.py | 34 ++-- pandas/tests/frame/test_period.py | 5 +- pandas/tests/frame/test_quantile.py | 2 +- pandas/tests/frame/test_query_eval.py | 22 ++- pandas/tests/frame/test_rank.py | 4 +- pandas/tests/frame/test_replace.py | 15 +- pandas/tests/frame/test_reshape.py | 4 +- pandas/tests/frame/test_sorting.py | 20 +- pandas/tests/frame/test_subclass.py | 2 +- pandas/tests/frame/test_timeseries.py | 31 ++- pandas/tests/frame/test_to_csv.py | 16 +- pandas/tests/frame/test_validate.py | 3 +- pandas/tests/generic/test_generic.py | 61 +++--- .../generic/test_label_or_level_utils.py | 28 +-- .../tests/groupby/aggregate/test_aggregate.py | 6 +- pandas/tests/groupby/aggregate/test_cython.py | 6 +- pandas/tests/groupby/aggregate/test_other.py | 4 +- pandas/tests/groupby/test_filters.py | 6 +- pandas/tests/groupby/test_function.py | 14 +- pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/groupby/test_grouping.py | 16 +- pandas/tests/groupby/test_rank.py | 4 +- pandas/tests/groupby/test_transform.py | 10 +- pandas/tests/groupby/test_whitelist.py | 2 +- pandas/tests/indexes/common.py | 164 ++++++++-------- pandas/tests/indexes/datetimes/test_astype.py | 2 +- .../indexes/datetimes/test_construction.py | 8 +- .../indexes/datetimes/test_date_range.py | 42 ++--- .../tests/indexes/datetimes/test_datetime.py | 9 +- .../tests/indexes/datetimes/test_indexing.py | 32 ++-- pandas/tests/indexes/datetimes/test_ops.py | 24 ++- .../indexes/datetimes/test_partial_slicing.py | 28 +-- .../indexes/datetimes/test_scalar_compat.py | 10 +- pandas/tests/indexes/datetimes/test_tools.py | 12 +- pandas/tests/indexes/interval/test_astype.py | 16 +- .../indexes/interval/test_construction.py | 30 +-- .../tests/indexes/interval/test_interval.py | 20 +- .../indexes/interval/test_interval_new.py | 2 +- .../indexes/interval/test_interval_range.py | 42 ++--- pandas/tests/indexes/multi/test_analytics.py | 24 +-- pandas/tests/indexes/multi/test_astype.py | 7 +- pandas/tests/indexes/multi/test_compat.py | 49 ++--- .../tests/indexes/multi/test_constructor.py | 73 ++++---- pandas/tests/indexes/multi/test_conversion.py | 6 +- .../tests/indexes/multi/test_equivalence.py | 18 +- pandas/tests/indexes/multi/test_get_set.py | 38 ++-- pandas/tests/indexes/multi/test_indexing.py | 26 ++- pandas/tests/indexes/multi/test_integrity.py | 36 ++-- pandas/tests/indexes/multi/test_join.py | 4 +- pandas/tests/indexes/multi/test_missing.py | 8 +- pandas/tests/indexes/multi/test_names.py | 31 ++- pandas/tests/indexes/multi/test_reindex.py | 19 +- pandas/tests/indexes/multi/test_reshape.py | 2 +- pandas/tests/indexes/multi/test_set_ops.py | 42 ++--- pandas/tests/indexes/multi/test_sorting.py | 12 +- pandas/tests/indexes/period/test_astype.py | 2 +- .../tests/indexes/period/test_construction.py | 30 +-- pandas/tests/indexes/period/test_indexing.py | 33 ++-- pandas/tests/indexes/period/test_ops.py | 14 +- .../indexes/period/test_partial_slicing.py | 12 +- pandas/tests/indexes/period/test_period.py | 10 +- .../tests/indexes/period/test_period_range.py | 16 +- pandas/tests/indexes/period/test_setops.py | 2 +- pandas/tests/indexes/period/test_tools.py | 8 +- pandas/tests/indexes/test_base.py | 77 ++++---- pandas/tests/indexes/test_category.py | 30 +-- pandas/tests/indexes/test_numeric.py | 20 +- pandas/tests/indexes/test_range.py | 14 +- .../tests/indexes/timedeltas/test_astype.py | 2 +- .../indexes/timedeltas/test_construction.py | 2 +- .../tests/indexes/timedeltas/test_indexing.py | 23 ++- pandas/tests/indexes/timedeltas/test_ops.py | 20 +- .../timedeltas/test_partial_slicing.py | 13 +- .../indexes/timedeltas/test_scalar_compat.py | 9 +- .../indexes/timedeltas/test_timedelta.py | 4 +- .../timedeltas/test_timedelta_range.py | 14 +- pandas/tests/indexes/timedeltas/test_tools.py | 4 +- pandas/tests/indexing/test_categorical.py | 21 +-- pandas/tests/indexing/test_coercion.py | 14 +- pandas/tests/indexing/test_floats.py | 6 +- pandas/tests/indexing/test_iloc.py | 14 +- pandas/tests/indexing/test_indexing.py | 19 +- pandas/tests/indexing/test_multiindex.py | 12 +- pandas/tests/indexing/test_scalar.py | 4 +- pandas/tests/internals/test_internals.py | 8 +- pandas/tests/io/formats/test_style.py | 2 +- pandas/tests/io/formats/test_to_csv.py | 6 +- pandas/tests/io/formats/test_to_html.py | 2 +- pandas/tests/io/json/test_compression.py | 10 +- .../tests/io/json/test_json_table_schema.py | 8 +- pandas/tests/io/json/test_pandas.py | 15 +- pandas/tests/io/json/test_readlines.py | 4 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/io/msgpack/test_except.py | 5 +- pandas/tests/io/msgpack/test_limits.py | 11 +- pandas/tests/io/msgpack/test_sequnpack.py | 3 +- pandas/tests/io/parser/c_parser_only.py | 16 +- pandas/tests/io/parser/common.py | 50 ++--- pandas/tests/io/parser/compression.py | 17 +- pandas/tests/io/parser/converters.py | 2 +- pandas/tests/io/parser/dialect.py | 4 +- pandas/tests/io/parser/header.py | 6 +- pandas/tests/io/parser/parse_dates.py | 22 +-- pandas/tests/io/parser/python_parser_only.py | 16 +- pandas/tests/io/parser/quoting.py | 38 ++-- pandas/tests/io/parser/skiprows.py | 5 +- pandas/tests/io/parser/test_read_fwf.py | 15 +- pandas/tests/io/parser/test_unsupported.py | 24 +-- pandas/tests/io/parser/usecols.py | 39 ++-- pandas/tests/io/sas/test_sas.py | 6 +- pandas/tests/io/test_common.py | 9 +- pandas/tests/io/test_excel.py | 12 +- pandas/tests/io/test_html.py | 28 ++- pandas/tests/io/test_pickle.py | 5 +- pandas/tests/io/test_pytables.py | 25 +-- pandas/tests/io/test_sql.py | 2 +- pandas/tests/plotting/test_misc.py | 2 +- pandas/tests/reshape/merge/test_join.py | 4 +- pandas/tests/reshape/merge/test_merge.py | 10 +- pandas/tests/reshape/merge/test_merge_asof.py | 5 +- .../tests/reshape/merge/test_merge_ordered.py | 5 +- pandas/tests/reshape/test_concat.py | 14 +- pandas/tests/reshape/test_melt.py | 2 +- pandas/tests/reshape/test_pivot.py | 10 +- pandas/tests/reshape/test_tile.py | 4 +- .../tests/reshape/test_union_categoricals.py | 14 +- pandas/tests/reshape/test_util.py | 15 +- pandas/tests/scalar/interval/test_interval.py | 31 ++- pandas/tests/scalar/interval/test_ops.py | 3 +- pandas/tests/scalar/period/test_asfreq.py | 20 +- pandas/tests/scalar/period/test_period.py | 44 ++--- .../scalar/timedelta/test_construction.py | 24 +-- .../tests/scalar/timestamp/test_timestamp.py | 12 +- .../tests/scalar/timestamp/test_timezones.py | 12 +- .../tests/scalar/timestamp/test_unary_ops.py | 2 +- .../tests/series/indexing/test_alter_index.py | 4 +- pandas/tests/series/indexing/test_boolean.py | 8 +- pandas/tests/series/indexing/test_indexing.py | 4 +- pandas/tests/series/test_alter_axes.py | 16 +- pandas/tests/series/test_analytics.py | 35 ++-- pandas/tests/series/test_api.py | 29 ++- pandas/tests/series/test_arithmetic.py | 4 +- pandas/tests/series/test_combine_concat.py | 4 +- pandas/tests/series/test_constructors.py | 14 +- pandas/tests/series/test_datetime_values.py | 8 +- pandas/tests/series/test_dtypes.py | 4 +- pandas/tests/series/test_missing.py | 30 +-- pandas/tests/series/test_operators.py | 12 +- pandas/tests/series/test_quantile.py | 2 +- pandas/tests/series/test_rank.py | 4 +- pandas/tests/series/test_replace.py | 8 +- pandas/tests/series/test_timeseries.py | 10 +- pandas/tests/series/test_timezones.py | 10 +- pandas/tests/series/test_validate.py | 4 +- pandas/tests/sparse/frame/test_frame.py | 20 +- pandas/tests/sparse/series/test_series.py | 28 +-- pandas/tests/sparse/test_indexing.py | 2 +- pandas/tests/test_algos.py | 42 ++--- pandas/tests/test_base.py | 22 +-- pandas/tests/test_errors.py | 7 +- pandas/tests/test_expressions.py | 12 +- pandas/tests/test_multilevel.py | 40 ++-- pandas/tests/test_panel.py | 79 ++++---- pandas/tests/test_register_accessor.py | 2 +- pandas/tests/test_resample.py | 20 +- pandas/tests/test_sorting.py | 12 +- pandas/tests/test_strings.py | 177 ++++++++---------- pandas/tests/test_take.py | 8 +- pandas/tests/test_window.py | 48 ++--- pandas/tests/tools/test_numeric.py | 14 +- pandas/tests/tseries/offsets/test_fiscal.py | 6 +- pandas/tests/tseries/test_frequencies.py | 42 ++--- pandas/tests/tslibs/test_libfrequencies.py | 4 +- pandas/tests/tslibs/test_parsing.py | 4 +- pandas/tests/util/test_hashing.py | 6 +- pandas/tests/util/test_testing.py | 139 +++++++------- pandas/tests/util/test_util.py | 44 +++-- pandas/util/testing.py | 7 + 224 files changed, 1973 insertions(+), 2032 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index c3ebd8f773aa6..73921a18ee5c7 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -687,7 +687,7 @@ def check(get_ser, test_ser): # with 'operate' (from core/ops.py) for the ops that are not # defined op = getattr(get_ser, op_str, None) - with tm.assert_raises_regex(TypeError, 'operate|cannot'): + with pytest.raises(TypeError, match='operate|cannot'): op(test_ser) # ## timedelta64 ### @@ -1042,9 +1042,9 @@ def test_dti_add_timestamp_raises(self, box_with_datetime): idx = DatetimeIndex(['2011-01-01', '2011-01-02']) idx = tm.box_expected(idx, box_with_datetime) msg = "cannot add" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx + Timestamp('2011-01-01') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): Timestamp('2011-01-01') + idx # ------------------------------------------------------------- @@ -1268,7 +1268,7 @@ def test_dti_sub_tdi(self, tz_naive_fixture): tm.assert_index_equal(result, expected) msg = 'cannot subtract .*TimedeltaIndex' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): tdi - dti # sub with timedelta64 array @@ -1276,7 +1276,7 @@ def test_dti_sub_tdi(self, tz_naive_fixture): tm.assert_index_equal(result, expected) msg = 'cannot subtract DatetimeIndex from' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): tdi.values - dti def test_dti_isub_tdi(self, tz_naive_fixture): @@ -1292,7 +1292,7 @@ def test_dti_isub_tdi(self, tz_naive_fixture): tm.assert_index_equal(result, expected) msg = 'cannot subtract .*TimedeltaIndex' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): tdi -= dti # isub with timedelta64 array @@ -1303,7 +1303,7 @@ def test_dti_isub_tdi(self, tz_naive_fixture): msg = '|'.join(['cannot perform __neg__ with this index type:', 'ufunc subtract cannot use operands with types', 'cannot subtract DatetimeIndex from']) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): tdi.values -= dti # ------------------------------------------------------------- @@ -1323,9 +1323,9 @@ def test_add_datetimelike_and_dti(self, addend, tz): # GH#9631 dti = DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize(tz) msg = 'cannot add DatetimeIndex and {0}'.format(type(addend).__name__) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): dti + addend - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): addend + dti # ------------------------------------------------------------- diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 3595cf7a2522f..687d07082ea33 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -118,27 +118,27 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_df_fail): base = tm.box_expected(base, box) msg = "Input has different freq=A-DEC from " - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): base <= Period('2011', freq='A') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): Period('2011', freq='A') >= base # TODO: Could parametrize over boxes for idx? idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='A') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): base <= idx # Different frequency msg = "Input has different freq=4M from " - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): base <= Period('2011', freq='4M') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): Period('2011', freq='4M') >= base idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): base <= idx @pytest.mark.parametrize('freq', ['M', '2M', '3M']) @@ -190,10 +190,10 @@ def test_pi_cmp_nat_mismatched_freq_raises(self, freq): diff = PeriodIndex(['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M') msg = "Input has different freq=4M from PeriodIndex" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): idx1 > diff - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): idx1 == diff # TODO: De-duplicate with test_pi_cmp_nat @@ -708,13 +708,13 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_daily(self, not_daily): other = not_daily rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') msg = 'Input has different freq(=.+)? from Period.*?\\(freq=D\\)' - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng + other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng += other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng - other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng -= other def test_pi_add_iadd_timedeltalike_hourly(self, two_hours): @@ -734,10 +734,10 @@ def test_pi_add_timedeltalike_mismatched_freq_hourly(self, not_hourly): rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') msg = 'Input has different freq(=.+)? from Period.*?\\(freq=H\\)' - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng + other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng += other def test_pi_sub_isub_timedeltalike_hourly(self, two_hours): @@ -768,13 +768,13 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_annual(self, rng = pd.period_range('2014', '2024', freq='A') msg = ('Input has different freq(=.+)? ' 'from Period.*?\\(freq=A-DEC\\)') - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng + other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng += other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng - other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng -= other def test_pi_add_iadd_timedeltalike_M(self): @@ -792,13 +792,13 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, other = mismatched_freq rng = pd.period_range('2014-01', '2016-12', freq='M') msg = 'Input has different freq(=.+)? from Period.*?\\(freq=M\\)' - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng + other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng += other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng - other - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): rng -= other def test_parr_add_sub_td64_nat(self, box): @@ -907,14 +907,14 @@ def test_pi_ops_errors(self, ng, box_with_period): obj = tm.box_expected(idx, box_with_period) msg = r"unsupported operand type\(s\)" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj + ng with pytest.raises(TypeError): # error message differs between PY2 and 3 ng + obj - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj - ng with pytest.raises(TypeError): @@ -1009,13 +1009,13 @@ def test_pi_offset_errors(self): # from Period msg = r"Input has different freq from Period.*?\(freq=D\)" for obj in [idx, ser]: - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): obj + pd.offsets.Hour(2) - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): pd.offsets.Hour(2) + obj - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): obj - pd.offsets.Hour(2) def test_pi_sub_period(self): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 902d0716aed8d..f92a772f3eaad 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -161,22 +161,22 @@ def test_tdi_add_timestamp_nat_masking(self): def test_tdi_add_overflow(self): # See GH#14068 msg = "too (big|large) to convert" - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): pd.to_timedelta(106580, 'D') + Timestamp('2000') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Timestamp('2000') + pd.to_timedelta(106580, 'D') _NaT = int(pd.NaT) + 1 msg = "Overflow in int64 addition" - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): pd.to_timedelta([106580], 'D') + Timestamp('2000') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Timestamp('2000') + pd.to_timedelta([106580], 'D') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): pd.to_timedelta([_NaT]) - Timedelta('1 days') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): pd.to_timedelta(['5 days', _NaT]) - Timedelta('1 days') - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): (pd.to_timedelta([_NaT, '5 days', '1 hours']) - pd.to_timedelta(['7 seconds', _NaT, '4 hours'])) @@ -415,7 +415,7 @@ def test_td64arr_sub_timestamp_raises(self, box): msg = ("cannot subtract a datelike from|" "Could not operate|" "cannot perform operation") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx - Timestamp('2011-01-01') def test_td64arr_add_timestamp(self, box, tz_naive_fixture): @@ -1217,9 +1217,9 @@ def test_td64arr_mul_tdscalar_invalid(self, box, scalar_td): # with 'operate' (from core/ops.py) for the ops that are not # defined pattern = 'operate|unsupported|cannot|not supported' - with tm.assert_raises_regex(TypeError, pattern): + with pytest.raises(TypeError, match=pattern): td1 * scalar_td - with tm.assert_raises_regex(TypeError, pattern): + with pytest.raises(TypeError, match=pattern): scalar_td * td1 def test_td64arr_mul_too_short_raises(self, box): @@ -1399,8 +1399,8 @@ def test_td64arr_pow_invalid(self, scalar_td, box): # with 'operate' (from core/ops.py) for the ops that are not # defined pattern = 'operate|unsupported|cannot|not supported' - with tm.assert_raises_regex(TypeError, pattern): + with pytest.raises(TypeError, match=pattern): scalar_td ** td1 - with tm.assert_raises_regex(TypeError, pattern): + with pytest.raises(TypeError, match=pattern): td1 ** scalar_td diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index e7dc67c5d6a5b..50f643756c5dc 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -138,5 +138,5 @@ def test_take_fill_value_new_raises(self): # https://github.com/pandas-dev/pandas/issues/23296 cat = pd.Categorical(['a', 'b', 'c']) xpr = r"'fill_value' \('d'\) is not in this Categorical's categories." - with tm.assert_raises_regex(TypeError, xpr): + with pytest.raises(TypeError, match=xpr): cat.take([0, 1, -1], fill_value='d', allow_fill=True) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 0f292a457bbc2..ea6facd66a1a3 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -305,7 +305,8 @@ def test_numpy_repeat(self): tm.assert_categorical_equal(np.repeat(cat, 2), exp) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1) + with pytest.raises(ValueError, match=msg): + np.repeat(cat, 2, axis=1) def test_isna(self): exp = np.array([False, False, True]) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 54de398473d52..ec90995e6084b 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -50,9 +50,9 @@ def test_set_ordered(self): # removed in 0.19.0 msg = "can\'t set attribute" - with tm.assert_raises_regex(AttributeError, msg): + with pytest.raises(AttributeError, match=msg): cat.ordered = True - with tm.assert_raises_regex(AttributeError, msg): + with pytest.raises(AttributeError, match=msg): cat.ordered = False def test_rename_categories(self): diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 8bd245d2aabae..a473f44d5d4aa 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -26,10 +26,10 @@ def test_validate_ordered(self): # This should be a boolean. ordered = np.array([0, 1, 2]) - with tm.assert_raises_regex(exp_err, exp_msg): + with pytest.raises(exp_err, match=exp_msg): Categorical([1, 2, 3], ordered=ordered) - with tm.assert_raises_regex(exp_err, exp_msg): + with pytest.raises(exp_err, match=exp_msg): Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'], ordered=ordered) @@ -351,13 +351,13 @@ def test_constructor_with_dtype(self, ordered): def test_constructor_dtype_and_others_raises(self): dtype = CategoricalDtype(['a', 'b'], ordered=True) - with tm.assert_raises_regex(ValueError, "Cannot"): + with pytest.raises(ValueError, match="Cannot"): Categorical(['a', 'b'], categories=['a', 'b'], dtype=dtype) - with tm.assert_raises_regex(ValueError, "Cannot"): + with pytest.raises(ValueError, match="Cannot"): Categorical(['a', 'b'], ordered=True, dtype=dtype) - with tm.assert_raises_regex(ValueError, "Cannot"): + with pytest.raises(ValueError, match="Cannot"): Categorical(['a', 'b'], ordered=False, dtype=dtype) @pytest.mark.parametrize('categories', [ @@ -372,7 +372,7 @@ def test_constructor_str_category(self, categories, ordered): tm.assert_categorical_equal(result, expected) def test_constructor_str_unknown(self): - with tm.assert_raises_regex(ValueError, "Unknown `dtype`"): + with pytest.raises(ValueError, match="Unknown `dtype`"): Categorical([1, 2], dtype="foo") def test_constructor_from_categorical_with_dtype(self): diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index 491a7867fee71..66f08355e7516 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -120,7 +120,7 @@ def test_astype(self, ordered): tm.assert_numpy_array_equal(result, expected) msg = 'could not convert string to float' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): cat.astype(float) # numeric diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index a54ee7381f9eb..8df5728f7d895 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -143,5 +143,5 @@ def test_mask_with_boolean_raises(index): if index: idx = CategoricalIndex(idx) - with tm.assert_raises_regex(ValueError, 'NA / NaN'): + with pytest.raises(ValueError, match='NA / NaN'): s[idx] diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 32698d190d93c..b4b361dabac61 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -70,7 +70,7 @@ def test_fillna_raises(self, fillna_kwargs, msg): # https://github.com/pandas-dev/pandas/issues/19682 cat = Categorical([1, 2, 3]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): cat.fillna(**fillna_kwargs) @pytest.mark.parametrize("named", [True, False]) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index ce15ebfb281f2..f216865faa2ad 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -238,15 +238,17 @@ def test_unordered_different_order_equal(self, ctor): def test_unordered_different_categories_raises(self): c1 = Categorical(['a', 'b'], categories=['a', 'b'], ordered=False) c2 = Categorical(['a', 'c'], categories=['c', 'a'], ordered=False) - with tm.assert_raises_regex(TypeError, - "Categoricals can only be compared"): + + with pytest.raises(TypeError, match=("Categoricals can " + "only be compared")): c1 == c2 def test_compare_different_lengths(self): c1 = Categorical([], categories=['a', 'b']) c2 = Categorical([], categories=['a']) + msg = "Categories are different lengths" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): c1 == c2 def test_compare_unordered_different_order(self): diff --git a/pandas/tests/arrays/categorical/test_sorting.py b/pandas/tests/arrays/categorical/test_sorting.py index 922d9fdb788b1..3d55862cd2cc0 100644 --- a/pandas/tests/arrays/categorical/test_sorting.py +++ b/pandas/tests/arrays/categorical/test_sorting.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np +import pytest from pandas import Categorical, Index import pandas.util.testing as tm @@ -30,12 +31,12 @@ def test_numpy_argsort(self): check_dtype=False) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - c, axis=0) + with pytest.raises(ValueError, match=msg): + np.argsort(c, axis=0) msg = "the 'order' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - c, order='C') + with pytest.raises(ValueError, match=msg): + np.argsort(c, order='C') def test_sort_values(self): diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 9a191dda3a73a..a04579dbbb6b1 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -38,7 +38,7 @@ def test_repeat(self, left_right_dtypes, repeats): ('foo', r'invalid literal for (int|long)\(\) with base 10')]) def test_repeat_errors(self, bad_repeats, msg): array = IntervalArray.from_breaks(range(4)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): array.repeat(bad_repeats) @pytest.mark.parametrize('new_closed', [ diff --git a/pandas/tests/arrays/interval/test_ops.py b/pandas/tests/arrays/interval/test_ops.py index 45bf465577ace..bdbd145ed2a80 100644 --- a/pandas/tests/arrays/interval/test_ops.py +++ b/pandas/tests/arrays/interval/test_ops.py @@ -78,5 +78,5 @@ def test_overlaps_invalid_type(self, constructor, other): interval_container = constructor.from_breaks(range(5)) msg = '`other` must be Interval-like, got {other}'.format( other=type(other).__name__) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_container.overlaps(other) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 852c4fb910560..04d7f4d498c2b 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -97,7 +97,7 @@ def test_constructor_object_dtype(self): @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int]) def test_constructor_na_dtype(self, dtype): - with tm.assert_raises_regex(ValueError, "Cannot convert"): + with pytest.raises(ValueError, match="Cannot convert"): SparseArray([0, 1, np.nan], dtype=dtype) def test_constructor_spindex_dtype(self): @@ -224,13 +224,18 @@ def test_get_item(self): assert self.zarr[7] == 5 errmsg = re.compile("bounds") - tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[11]) - tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[-11]) + + with pytest.raises(IndexError, match=errmsg): + self.arr[11] + + with pytest.raises(IndexError, match=errmsg): + self.arr[-11] + assert self.arr[-1] == self.arr[len(self.arr) - 1] def test_take_scalar_raises(self): msg = "'indices' must be an array, not a scalar '2'." - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.arr.take(2) def test_take(self): @@ -258,8 +263,8 @@ def test_take_negative(self): tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp) def test_bad_take(self): - tm.assert_raises_regex( - IndexError, "bounds", lambda: self.arr.take([11])) + with pytest.raises(IndexError, match="bounds"): + self.arr.take([11]) def test_take_filling(self): # similar tests as GH 12631 @@ -279,10 +284,11 @@ def test_take_filling(self): expected = SparseArray([np.nan, np.nan, 4]) tm.assert_sp_array_equal(result, expected) - msg = ("Invalid value in 'indices'") - with tm.assert_raises_regex(ValueError, msg): + msg = "Invalid value in 'indices'" + with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -2]), allow_fill=True) - with tm.assert_raises_regex(ValueError, msg): + + with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -5]), allow_fill=True) with pytest.raises(IndexError): @@ -314,9 +320,9 @@ def test_take_filling_fill_value(self): tm.assert_sp_array_equal(result, expected) msg = ("Invalid value in 'indices'.") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -2]), allow_fill=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -5]), allow_fill=True) with pytest.raises(IndexError): @@ -351,12 +357,15 @@ def setitem(): def setslice(): self.arr[1:5] = 2 - tm.assert_raises_regex(TypeError, "item assignment", setitem) - tm.assert_raises_regex(TypeError, "item assignment", setslice) + with pytest.raises(TypeError, match="item assignment"): + setitem() + + with pytest.raises(TypeError, match="item assignment"): + setslice() def test_constructor_from_too_large_array(self): - tm.assert_raises_regex(TypeError, "expected dimension <= 1 data", - SparseArray, np.arange(10).reshape((2, 5))) + with pytest.raises(TypeError, match="expected dimension <= 1 data"): + SparseArray(np.arange(10).reshape((2, 5))) def test_constructor_from_sparse(self): res = SparseArray(self.zarr) @@ -441,7 +450,7 @@ def test_astype(self): tm.assert_sp_array_equal(result, expected) arr = SparseArray([0, np.nan, 0, 1], fill_value=0) - with tm.assert_raises_regex(ValueError, 'NA'): + with pytest.raises(ValueError, match='NA'): arr.astype('Sparse[i8]') def test_astype_bool(self): @@ -481,12 +490,12 @@ def test_set_fill_value(self): # sparsearray with NaN fill value, why not update one? # coerces to int # msg = "unable to set fill_value 3\\.1 to int64 dtype" - # with tm.assert_raises_regex(ValueError, msg): + # with pytest.raises(ValueError, match=msg): arr.fill_value = 3.1 assert arr.fill_value == 3.1 # msg = "unable to set fill_value nan to int64 dtype" - # with tm.assert_raises_regex(ValueError, msg): + # with pytest.raises(ValueError, match=msg): arr.fill_value = np.nan assert np.isnan(arr.fill_value) @@ -496,12 +505,12 @@ def test_set_fill_value(self): # coerces to bool # msg = "unable to set fill_value 0 to bool dtype" - # with tm.assert_raises_regex(ValueError, msg): + # with pytest.raises(ValueError, match=msg): arr.fill_value = 0 assert arr.fill_value == 0 # msg = "unable to set fill_value nan to bool dtype" - # with tm.assert_raises_regex(ValueError, msg): + # with pytest.raises(ValueError, match=msg): arr.fill_value = np.nan assert np.isnan(arr.fill_value) @@ -510,7 +519,7 @@ def test_set_fill_invalid_non_scalar(self, val): arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool) msg = "fill_value must be a scalar" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): arr.fill_value = val def test_copy_shallow(self): @@ -793,8 +802,8 @@ def test_numpy_all(self, data, pos, neg): # raises with a different message on py2. msg = "the \'out\' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.all, - SparseArray(data), out=np.array([])) + with pytest.raises(ValueError, match=msg): + np.all(SparseArray(data), out=np.array([])) @pytest.mark.parametrize('data,pos,neg', [ ([False, True, False], True, False), @@ -838,8 +847,8 @@ def test_numpy_any(self, data, pos, neg): assert not out msg = "the \'out\' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.any, - SparseArray(data), out=out) + with pytest.raises(ValueError, match=msg): + np.any(SparseArray(data), out=out) def test_sum(self): data = np.arange(10).astype(float) @@ -866,12 +875,12 @@ def test_numpy_sum(self): assert out == 40.0 msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.sum, - SparseArray(data), dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.sum, - SparseArray(data), out=out) + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), out=out) @pytest.mark.parametrize("data,expected", [ (np.array([1, 2, 3, 4, 5], dtype=float), # non-null data @@ -894,16 +903,16 @@ def test_cumsum(self, data, expected, numpy): if numpy: # numpy compatibility checks. msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - SparseArray(data), dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - SparseArray(data), out=out) + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), out=out) else: axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. msg = "axis\\(={axis}\\) out of bounds".format(axis=axis) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): SparseArray(data).cumsum(axis=axis) def test_mean(self): @@ -925,12 +934,12 @@ def test_numpy_mean(self): assert out == 40.0 / 9 msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.mean, - SparseArray(data), dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.mean, - SparseArray(data), out=out) + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), out=out) def test_ufunc(self): # GH 13853 make sure ufunc is applied to fill_value @@ -1042,7 +1051,7 @@ def test_to_coo(self): def test_non_sparse_raises(self): ser = pd.Series([1, 2, 3]) - with tm.assert_raises_regex(AttributeError, '.sparse'): + with pytest.raises(AttributeError, match='.sparse'): ser.sparse.density diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 75fc325b07a08..7c310693cf26c 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -3,7 +3,6 @@ import pandas as pd from pandas.core.sparse.api import SparseDtype -import pandas.util.testing as tm @pytest.mark.parametrize("dtype, fill_value", [ @@ -138,5 +137,5 @@ def test_parse_subtype(string, expected): "Sparse[bool, True]", ]) def test_construct_from_string_fill_value_raises(string): - with tm.assert_raises_regex(TypeError, 'fill_value in the string is not'): + with pytest.raises(TypeError, match='fill_value in the string is not'): SparseDtype.construct_from_string(string) diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py index cbad7e8e9136d..6e9d790bf85f3 100644 --- a/pandas/tests/arrays/sparse/test_libsparse.py +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -478,37 +478,37 @@ def test_check_integrity(self): # Too many indices than specified in self.length msg = "Too many indices" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=1, indices=[1, 2, 3]) # No index can be negative. msg = "No index can be less than zero" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, -2, 3]) # No index can be negative. msg = "No index can be less than zero" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, -2, 3]) # All indices must be less than the length. msg = "All indices must be less than the length" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, 2, 5]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, 2, 6]) # Indices must be strictly ascending. msg = "Indices must be strictly increasing" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, 3, 2]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntIndex(length=5, indices=[1, 3, 3]) def test_int_internal(self): diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 0fe07caed5b85..10f54458e4980 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -453,17 +453,17 @@ def test_construct_cast_invalid(self, dtype): msg = "cannot safely" arr = [1.2, 2.3, 3.7] - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) arr = [1.2, 2.3, 3.7, np.nan] - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): integer_array(arr, dtype=dtype) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.Series(arr).astype(dtype) @@ -683,11 +683,11 @@ def test_reduce_to_float(op): def test_astype_nansafe(): - # https://github.com/pandas-dev/pandas/pull/22343 + # see gh-22343 arr = integer_array([np.nan, 1, 2], dtype="Int8") + msg = "cannot convert float NaN to integer" - with tm.assert_raises_regex( - ValueError, 'cannot convert float NaN to integer'): + with pytest.raises(ValueError, match=msg): arr.astype('uint32') diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 0125729048cdd..95a1d1781456c 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -53,22 +53,22 @@ def test_from_datetime64_freq_changes(): "Input has different freq"), ]) def test_period_array_raises(data, freq, msg): - with tm.assert_raises_regex(IncompatibleFrequency, msg): + with pytest.raises(IncompatibleFrequency, match=msg): period_array(data, freq) def test_period_array_non_period_series_raies(): ser = pd.Series([1, 2, 3]) - with tm.assert_raises_regex(TypeError, 'dtype'): + with pytest.raises(TypeError, match='dtype'): PeriodArray(ser, freq='D') def test_period_array_freq_mismatch(): arr = period_array(['2000', '2001'], freq='D') - with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): + with pytest.raises(IncompatibleFrequency, match='freq'): PeriodArray(arr, freq='M') - with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): + with pytest.raises(IncompatibleFrequency, match='freq'): PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) @@ -80,11 +80,11 @@ def test_asi8(): def test_take_raises(): arr = period_array(['2000', '2001'], freq='D') - with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): + with pytest.raises(IncompatibleFrequency, match='freq'): arr.take([0, -1], allow_fill=True, fill_value=pd.Period('2000', freq='W')) - with tm.assert_raises_regex(ValueError, 'foo'): + with pytest.raises(ValueError, match='foo'): arr.take([0, -1], allow_fill=True, fill_value='foo') @@ -129,13 +129,13 @@ def test_astype_period(): def test_astype_datetime(other): arr = period_array(['2000', '2001', None], freq='D') # slice off the [ns] so that the regex matches. - with tm.assert_raises_regex(TypeError, other[:-4]): + with pytest.raises(TypeError, match=other[:-4]): arr.astype(other) def test_fillna_raises(): arr = period_array(['2000', '2001', '2002'], freq='D') - with tm.assert_raises_regex(ValueError, 'Length'): + with pytest.raises(ValueError, match='Length'): arr.fillna(arr[:2]) @@ -167,23 +167,23 @@ def test_setitem(key, value, expected): def test_setitem_raises_incompatible_freq(): arr = PeriodArray(np.arange(3), freq="D") - with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + with pytest.raises(IncompatibleFrequency, match="freq"): arr[0] = pd.Period("2000", freq="A") other = period_array(['2000', '2001'], freq='A') - with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + with pytest.raises(IncompatibleFrequency, match="freq"): arr[[0, 1]] = other def test_setitem_raises_length(): arr = PeriodArray(np.arange(3), freq="D") - with tm.assert_raises_regex(ValueError, "length"): + with pytest.raises(ValueError, match="length"): arr[[0, 1]] = [pd.Period("2000", freq="D")] def test_setitem_raises_type(): arr = PeriodArray(np.arange(3), freq="D") - with tm.assert_raises_regex(TypeError, "int"): + with pytest.raises(TypeError, match="int"): arr[0] = 1 @@ -193,5 +193,5 @@ def test_setitem_raises_type(): def tet_sub_period(): arr = period_array(['2000', '2001'], freq='D') other = pd.Period("2000", freq="M") - with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + with pytest.raises(IncompatibleFrequency, match="freq"): arr - other diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index eef8646e4d6d2..52945edb14e58 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -694,12 +694,12 @@ def test_disallow_python_keywords(self): # GH 18221 df = pd.DataFrame([[0, 0, 0]], columns=['foo', 'bar', 'class']) msg = "Python keyword not valid identifier in numexpr query" - with tm.assert_raises_regex(SyntaxError, msg): + with pytest.raises(SyntaxError, match=msg): df.query('class == 0') df = pd.DataFrame() df.index.name = 'lambda' - with tm.assert_raises_regex(SyntaxError, msg): + with pytest.raises(SyntaxError, match=msg): df.query('lambda == 0') @@ -1392,11 +1392,11 @@ def test_cannot_item_assign(self, invalid_target): msg = "Cannot assign expression output to target" expression = "a = 1 + 2" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.eval(expression, target=invalid_target, inplace=True) if hasattr(invalid_target, "copy"): - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.eval(expression, target=invalid_target, inplace=False) @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)]) @@ -1404,7 +1404,7 @@ def test_cannot_copy_item(self, invalid_target): msg = "Cannot return a copy of the target" expression = "a = 1 + 2" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.eval(expression, target=invalid_target, inplace=False) @pytest.mark.parametrize("target", [1, "cat", [1, 2], @@ -1415,7 +1415,7 @@ def test_inplace_no_assignment(self, target): assert self.eval(expression, target=target, inplace=False) == 3 msg = "Cannot operate inplace if there is no assignment" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.eval(expression, target=target, inplace=True) def test_basic_period_index_boolean_expression(self): @@ -1692,17 +1692,18 @@ def test_result_types2(self): def test_undefined_func(self): df = DataFrame({'a': np.random.randn(10)}) - with tm.assert_raises_regex( - ValueError, "\"mysin\" is not a supported function"): + msg = "\"mysin\" is not a supported function" + + with pytest.raises(ValueError, match=msg): df.eval("mysin(a)", engine=self.engine, parser=self.parser) def test_keyword_arg(self): df = DataFrame({'a': np.random.randn(10)}) - with tm.assert_raises_regex(TypeError, - "Function \"sin\" does not support " - "keyword arguments"): + msg = "Function \"sin\" does not support keyword arguments" + + with pytest.raises(TypeError, match=msg): df.eval("sin(x=a)", engine=self.engine, parser=self.parser) @@ -1763,16 +1764,16 @@ def test_no_new_globals(self, engine, parser): @td.skip_if_no_ne def test_invalid_engine(): - tm.assert_raises_regex(KeyError, 'Invalid engine \'asdf\' passed', - pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, - engine='asdf') + msg = 'Invalid engine \'asdf\' passed' + with pytest.raises(KeyError, match=msg): + pd.eval('x + y', local_dict={'x': 1, 'y': 2}, engine='asdf') @td.skip_if_no_ne def test_invalid_parser(): - tm.assert_raises_regex(KeyError, 'Invalid parser \'asdf\' passed', - pd.eval, 'x + y', local_dict={'x': 1, 'y': 2}, - parser='asdf') + msg = 'Invalid parser \'asdf\' passed' + with pytest.raises(KeyError, match=msg): + pd.eval('x + y', local_dict={'x': 1, 'y': 2}, parser='asdf') _parsers = {'python': PythonExprVisitor, 'pytables': pytables.ExprVisitor, @@ -1809,20 +1810,18 @@ def test_invalid_local_variable_reference(engine, parser): for _expr in exprs: if parser != 'pandas': - with tm.assert_raises_regex(SyntaxError, - "The '@' prefix is only"): + with pytest.raises(SyntaxError, match="The '@' prefix is only"): pd.eval(_expr, engine=engine, parser=parser) else: - with tm.assert_raises_regex(SyntaxError, - "The '@' prefix is not"): + with pytest.raises(SyntaxError, match="The '@' prefix is not"): pd.eval(_expr, engine=engine, parser=parser) def test_numexpr_builtin_raises(engine, parser): sin, dotted_line = 1, 2 if engine == 'numexpr': - with tm.assert_raises_regex(NumExprClobberingError, - 'Variables in expression .+'): + msg = 'Variables in expression .+' + with pytest.raises(NumExprClobberingError, match=msg): pd.eval('sin + dotted_line', engine=engine, parser=parser) else: res = pd.eval('sin + dotted_line', engine=engine, parser=parser) @@ -1831,21 +1830,20 @@ def test_numexpr_builtin_raises(engine, parser): def test_bad_resolver_raises(engine, parser): cannot_resolve = 42, 3.0 - with tm.assert_raises_regex(TypeError, 'Resolver of type .+'): + with pytest.raises(TypeError, match='Resolver of type .+'): pd.eval('1 + 2', resolvers=cannot_resolve, engine=engine, parser=parser) def test_empty_string_raises(engine, parser): # GH 13139 - with tm.assert_raises_regex(ValueError, - 'expr cannot be an empty string'): + with pytest.raises(ValueError, match="expr cannot be an empty string"): pd.eval('', engine=engine, parser=parser) def test_more_than_one_expression_raises(engine, parser): - with tm.assert_raises_regex(SyntaxError, - 'only a single expression is allowed'): + with pytest.raises(SyntaxError, match=("only a single expression " + "is allowed")): pd.eval('1 + 1; 2 + 2', engine=engine, parser=parser) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index b5353e34a2311..4dd55321dc71f 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -9,7 +9,6 @@ from pandas.core.sparse.api import SparseDtype import pandas.core.dtypes.common as com -import pandas.util.testing as tm import pandas.util._test_decorators as td @@ -19,7 +18,7 @@ class TestPandasDtype(object): # Per issue GH15520 @pytest.mark.parametrize('box', [pd.Timestamp, 'pd.Timestamp', list]) def test_invalid_dtype_error(self, box): - with tm.assert_raises_regex(TypeError, 'not understood'): + with pytest.raises(TypeError, match='not understood'): com.pandas_dtype(box) @pytest.mark.parametrize('dtype', [ diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 2927442f9b6ee..c70a549234a44 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -95,8 +95,8 @@ def test_construction_from_string(self): TypeError, lambda: CategoricalDtype.construct_from_string('foo')) def test_constructor_invalid(self): - with tm.assert_raises_regex(TypeError, - "CategoricalIndex.* must be called"): + msg = "CategoricalIndex.* must be called" + with pytest.raises(TypeError, match=msg): CategoricalDtype("category") def test_is_dtype(self): @@ -455,12 +455,12 @@ def test_construction_not_supported(self, subtype): # GH 19016 msg = ('category, object, and string subtypes are not supported ' 'for IntervalDtype') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalDtype(subtype) def test_construction_errors(self): msg = 'could not construct IntervalDtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalDtype('xx') def test_construction_from_string(self): @@ -475,7 +475,7 @@ def test_construction_from_string_errors(self, string): # these are invalid entirely msg = 'a string needs to be passed, got type' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) @pytest.mark.parametrize('string', [ @@ -484,7 +484,7 @@ def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype msg = 'could not construct IntervalDtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) def test_subclass(self): @@ -698,10 +698,10 @@ def test_categorical_equality_strings(self, categories, ordered, other): assert result is expected def test_invalid_raises(self): - with tm.assert_raises_regex(TypeError, 'ordered'): + with pytest.raises(TypeError, match='ordered'): CategoricalDtype(['a', 'b'], ordered='foo') - with tm.assert_raises_regex(TypeError, 'collection'): + with pytest.raises(TypeError, match='collection'): CategoricalDtype('category') def test_mixed(self): @@ -782,7 +782,7 @@ def test_update_dtype_string(self, ordered): def test_update_dtype_errors(self, bad_dtype): dtype = CategoricalDtype(list('abc'), False) msg = 'a CategoricalDtype must be passed to perform an update, ' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): dtype.update_dtype(bad_dtype) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index e37efce901cbd..1ff3005722341 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -371,7 +371,7 @@ def test_maybe_convert_numeric_infinities(self): tm.assert_numpy_array_equal(out, pos) # too many characters - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): lib.maybe_convert_numeric( np.array(['foo_' + infinity], dtype=object), na_values, maybe_int) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 076be53a4a72f..3b966cd8d4774 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -2,7 +2,6 @@ import pandas as pd from pandas.core.internals import ExtensionBlock -import pandas.util.testing as tm from .base import BaseExtensionTests @@ -43,7 +42,7 @@ def test_dataframe_from_series(self, data): def test_series_given_mismatched_index_raises(self, data): msg = 'Length of passed values is 3, index implies 5' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.Series(data[:3], index=[0, 1, 2, 3, 4]) def test_from_dtype(self, data): diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 00bb3b5d4eec2..dfc82c6041eae 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -2,7 +2,6 @@ import pytest import pandas as pd -import pandas.util.testing as tm from .base import BaseExtensionTests @@ -168,7 +167,7 @@ def test_take(self, data, na_value, na_cmp): assert result[0] == data[0] assert na_cmp(result[1], na_value) - with tm.assert_raises_regex(IndexError, "out of bounds"): + with pytest.raises(IndexError, match="out of bounds"): data.take([len(data) + 1]) def test_take_empty(self, data, na_value, na_cmp): @@ -180,7 +179,7 @@ def test_take_empty(self, data, na_value, na_cmp): with pytest.raises(IndexError): empty.take([-1]) - with tm.assert_raises_regex(IndexError, "cannot do a non-empty take"): + with pytest.raises(IndexError, match="cannot do a non-empty take"): empty.take([0, 1]) def test_take_negative(self, data): diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 60de5d4db03d9..e9a89c1af2f22 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -125,8 +125,8 @@ def test_fillna_copy_series(self, data_missing): assert ser._values is arr def test_fillna_length_mismatch(self, data_missing): - with (tm.assert_raises_regex(ValueError, - "Length of 'value' does not match.")): + msg = "Length of 'value' does not match." + with pytest.raises(ValueError, match=msg): data_missing.fillna(data_missing.take([1])) def test_combine_le(self, data_repeated): diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 72316b5b7eb91..3d798b2af5c43 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -4,7 +4,6 @@ import pytest import pandas as pd -import pandas.util.testing as tm from .base import BaseExtensionTests @@ -34,12 +33,12 @@ def test_setitem_sequence_mismatched_length_raises(self, data, as_array): value = data._from_sequence(value) xpr = 'cannot set using a {} indexer with a different length' - with tm.assert_raises_regex(ValueError, xpr.format('list-like')): + with pytest.raises(ValueError, match=xpr.format('list-like')): ser[[0, 1]] = value # Ensure no modifications made before the exception self.assert_series_equal(ser, original) - with tm.assert_raises_regex(ValueError, xpr.format('slice')): + with pytest.raises(ValueError, match=xpr.format('slice')): ser[slice(3)] = value self.assert_series_equal(ser, original) @@ -164,7 +163,7 @@ def test_setitem_expand_with_extension(self, data): def test_setitem_frame_invalid_length(self, data): df = pd.DataFrame({"A": [1] * len(data)}) xpr = "Length of values does not match length of index" - with tm.assert_raises_regex(ValueError, xpr): + with pytest.raises(ValueError, match=xpr): df['B'] = data[:5] @pytest.mark.xfail(reason="GH#20441: setitem on extension types.", diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index af5f6bf0a2f65..01efd7ec7e590 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -207,7 +207,7 @@ class TestSetitem(BaseDecimal, base.BaseSetitemTests): def test_series_constructor_coerce_data_to_extension_dtype_raises(): xpr = ("Cannot cast data to extension dtype 'decimal'. Pass the " "extension array directly.") - with tm.assert_raises_regex(ValueError, xpr): + with pytest.raises(ValueError, match=xpr): pd.Series([0, 1, 2], dtype=DecimalDtype()) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index b7c61496f0bf0..a9fb22bb72497 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -272,7 +272,7 @@ def test_error(self, data, all_arithmetic_operators): def test_add_series_with_extension_array(self, data): ser = pd.Series(data) - with tm.assert_raises_regex(TypeError, "unsupported"): + with pytest.raises(TypeError, match="unsupported"): ser + data def _check_divmod_op(self, s, op, other, exc=NotImplementedError): diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index b1d08a5620bf3..7fd389e19325c 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -22,7 +22,6 @@ from pandas import Categorical from pandas.api.types import CategoricalDtype from pandas.tests.extension import base -import pandas.util.testing as tm def make_data(): @@ -213,7 +212,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): def test_add_series_with_extension_array(self, data): ser = pd.Series(data) - with tm.assert_raises_regex(TypeError, "cannot perform"): + with pytest.raises(TypeError, match="cannot perform"): ser + data def _check_divmod_op(self, s, op, other, exc=NotImplementedError): diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 2c7bc79c324b4..d67c0d0a9c05a 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -21,7 +21,6 @@ from pandas import Interval from pandas.core.arrays import IntervalArray from pandas.tests.extension import base -import pandas.util.testing as tm def make_data(): @@ -137,7 +136,7 @@ def test_fillna_series(self): def test_non_scalar_raises(self, data_missing): msg = "Got a 'list' instead." - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): data_missing.fillna([1, 1]) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 3de3f1dfd9dbc..2e629ccb2981e 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -8,7 +8,6 @@ import pandas as pd from pandas.core.arrays import PeriodArray from pandas.tests.extension import base -import pandas.util.testing as tm @pytest.fixture @@ -114,7 +113,7 @@ def test_add_series_with_extension_array(self, data): s = pd.Series(data) msg = (r"unsupported operand type\(s\) for \+: " r"\'PeriodArray\' and \'PeriodArray\'") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): s + data def test_error(self): diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 0752c125b75eb..2b4d1e6f25c65 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -29,7 +29,7 @@ def test_set_index_directly(self, float_string_frame): df.index = idx tm.assert_index_equal(df.index, idx) - with tm.assert_raises_regex(ValueError, 'Length mismatch'): + with pytest.raises(ValueError, match='Length mismatch'): df.index = idx[::2] def test_set_index(self, float_string_frame): @@ -38,7 +38,7 @@ def test_set_index(self, float_string_frame): df = df.set_index(idx) tm.assert_index_equal(df.index, idx) - with tm.assert_raises_regex(ValueError, 'Length mismatch'): + with pytest.raises(ValueError, match='Length mismatch'): df.set_index(idx[::2]) def test_set_index_cast(self): @@ -134,7 +134,7 @@ def test_set_index_pass_single_array(self, frame_of_index_cols, if box == list: # list of strings gets interpreted as list of keys msg = "['one', 'two', 'three', 'one', 'two']" - with tm.assert_raises_regex(KeyError, msg): + with pytest.raises(KeyError, match=msg): df.set_index(key, drop=drop, append=append) else: # np.array/tuple/iter/list-of-list "forget" the name of B @@ -232,12 +232,10 @@ def test_set_index_pass_multiindex(self, frame_of_index_cols, def test_set_index_verify_integrity(self, frame_of_index_cols): df = frame_of_index_cols - with tm.assert_raises_regex(ValueError, - 'Index has duplicate keys'): + with pytest.raises(ValueError, match='Index has duplicate keys'): df.set_index('A', verify_integrity=True) # with MultiIndex - with tm.assert_raises_regex(ValueError, - 'Index has duplicate keys'): + with pytest.raises(ValueError, match='Index has duplicate keys'): df.set_index([df['A'], df['A']], verify_integrity=True) @pytest.mark.parametrize('append', [True, False]) @@ -245,21 +243,21 @@ def test_set_index_verify_integrity(self, frame_of_index_cols): def test_set_index_raise(self, frame_of_index_cols, drop, append): df = frame_of_index_cols - with tm.assert_raises_regex(KeyError, "['foo', 'bar', 'baz']"): + with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"): # column names are A-E, as well as one tuple df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append) # non-existent key in list with arrays - with tm.assert_raises_regex(KeyError, 'X'): + with pytest.raises(KeyError, match='X'): df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append) msg = 'The parameter "keys" may only contain a combination of.*' # forbidden type, e.g. set - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df.set_index(set(df['A']), drop=drop, append=append) # forbidden type in list, e.g. set - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df.set_index(['A', df['A'], set(df['A'])], drop=drop, append=append) @@ -427,7 +425,7 @@ def test_set_index_empty_column(self): def test_set_columns(self, float_string_frame): cols = Index(np.arange(len(float_string_frame.columns))) float_string_frame.columns = cols - with tm.assert_raises_regex(ValueError, 'Length mismatch'): + with pytest.raises(ValueError, match='Length mismatch'): float_string_frame.columns = cols[::2] def test_dti_set_index_reindex(self): @@ -575,13 +573,13 @@ def test_rename_axis_mapper(self): assert result.columns.name == 'meh' # Test different error cases - with tm.assert_raises_regex(TypeError, 'Must pass'): + with pytest.raises(TypeError, match='Must pass'): df.rename_axis(index='wrong') - with tm.assert_raises_regex(ValueError, 'Length of names'): + with pytest.raises(ValueError, match='Length of names'): df.rename_axis(index=['wrong']) - with tm.assert_raises_regex(TypeError, 'bogus'): + with pytest.raises(TypeError, match='bogus'): df.rename_axis(bogus=None) def test_rename_multiindex(self): @@ -858,9 +856,9 @@ def test_reset_index_level(self): # Missing levels - for both MultiIndex and single-level Index: for idx_lev in ['A', 'B'], ['A']: - with tm.assert_raises_regex(KeyError, 'Level E '): + with pytest.raises(KeyError, match='Level E '): df.set_index(idx_lev).reset_index(level=['A', 'E']) - with tm.assert_raises_regex(IndexError, 'Too many levels'): + with pytest.raises(IndexError, match='Too many levels'): df.set_index(idx_lev).reset_index(level=[0, 1, 2]) def test_reset_index_right_dtype(self): @@ -1054,35 +1052,35 @@ def test_rename_positional_named(self): tm.assert_frame_equal(result, expected) def test_rename_axis_style_raises(self): - # https://github.com/pandas-dev/pandas/issues/12392 - df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1']) + # see gh-12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"]) # Named target and axis - with tm.assert_raises_regex(TypeError, None): + over_spec_msg = ("Cannot specify both 'axis' and " + "any of 'index' or 'columns'") + with pytest.raises(TypeError, match=over_spec_msg): df.rename(index=str.lower, axis=1) - with tm.assert_raises_regex(TypeError, None): - df.rename(index=str.lower, axis='columns') - - with tm.assert_raises_regex(TypeError, None): - df.rename(index=str.lower, axis='columns') + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis="columns") - with tm.assert_raises_regex(TypeError, None): - df.rename(columns=str.lower, axis='columns') + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(columns=str.lower, axis="columns") - with tm.assert_raises_regex(TypeError, None): + with pytest.raises(TypeError, match=over_spec_msg): df.rename(index=str.lower, axis=0) # Multiple targets and axis - with tm.assert_raises_regex(TypeError, None): - df.rename(str.lower, str.lower, axis='columns') + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, str.lower, axis="columns") # Too many targets - with tm.assert_raises_regex(TypeError, None): + over_spec_msg = "Cannot specify all of 'mapper', 'index', 'columns'." + with pytest.raises(TypeError, match=over_spec_msg): df.rename(str.lower, str.lower, str.lower) # Duplicates - with tm.assert_raises_regex(TypeError, "multiple values"): + with pytest.raises(TypeError, match="multiple values"): df.rename(id, mapper=id) def test_reindex_api_equivalence(self): @@ -1279,7 +1277,7 @@ def test_set_axis_inplace(self): # wrong values for the "axis" parameter for axis in 3, 'foo': - with tm.assert_raises_regex(ValueError, 'No axis named'): + with pytest.raises(ValueError, match='No axis named'): df.set_axis(list('abc'), axis=axis, inplace=False) def test_set_axis_prior_to_deprecation_signature(self): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index ab4eaf02f38dd..c9481fef4aa36 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -101,7 +101,8 @@ def wrapper(x): assert lcd_dtype == result1.dtype # bad axis - tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + with pytest.raises(ValueError, match='No axis named 2'): + f(axis=2) # all NA case if has_skipna: @@ -189,7 +190,8 @@ def wrapper(x): check_dtype=False) # bad axis - tm.assert_raises_regex(ValueError, 'No axis named 2', f, axis=2) + with pytest.raises(ValueError, match='No axis named 2'): + f(axis=2) # all NA case if has_skipna: @@ -343,7 +345,7 @@ def test_corr_invalid_method(self): df = pd.DataFrame(np.random.normal(size=(10, 2))) msg = ("method must be either 'pearson', 'spearman', " "or 'kendall'") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.corr(method="____") def test_cov(self, float_frame, float_string_frame): @@ -1469,7 +1471,7 @@ def test_any_all_level_axis_none_raises(self, method): names=['out', 'in']) ) xpr = "Must specify 'axis' when aggregating by level." - with tm.assert_raises_regex(ValueError, xpr): + with pytest.raises(ValueError, match=xpr): getattr(df, method)(axis=None, level='out') # ---------------------------------------------------------------------- @@ -1757,7 +1759,7 @@ def test_numpy_round(self): tm.assert_frame_equal(out, expected) msg = "the 'out' parameter is not supported" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): np.round(df, decimals=0, out=df) def test_round_mixed_type(self): @@ -1997,8 +1999,7 @@ def test_dot(self): expected = a.dot(a.iloc[0]) tm.assert_series_equal(result, expected) - with tm.assert_raises_regex(ValueError, - 'Dot product shape mismatch'): + with pytest.raises(ValueError, match='Dot product shape mismatch'): a.dot(row[:-1]) a = np.random.rand(1, 5) @@ -2015,7 +2016,7 @@ def test_dot(self): df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) - with tm.assert_raises_regex(ValueError, 'aligned'): + with pytest.raises(ValueError, match='aligned'): df.dot(df2) @pytest.mark.skipif(not PY35, @@ -2075,7 +2076,7 @@ def test_matmul(self): df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) - with tm.assert_raises_regex(ValueError, 'aligned'): + with pytest.raises(ValueError, match='aligned'): operator.matmul(df, df2) @@ -2144,7 +2145,7 @@ def test_n(self, df_strings, nselect_method, n, order): error_msg = self.dtype_error_msg_template.format( column='b', method=nselect_method, dtype='object') - with tm.assert_raises_regex(TypeError, error_msg): + with pytest.raises(TypeError, match=error_msg): getattr(df, nselect_method)(n, order) else: ascending = nselect_method == 'nsmallest' @@ -2162,7 +2163,7 @@ def test_n_error(self, df_main_dtypes, nselect_method, columns): # escape some characters that may be in the repr error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)") .replace("[", "\\[").replace("]", "\\]")) - with tm.assert_raises_regex(TypeError, error_msg): + with pytest.raises(TypeError, match=error_msg): getattr(df, nselect_method)(2, columns) def test_n_all_dtypes(self, df_main_dtypes): diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index d6d932d235eec..295a603850984 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -107,14 +107,17 @@ def test_get_axis(self, float_frame): assert f._get_axis(0) is f.index assert f._get_axis(1) is f.columns - tm.assert_raises_regex( - ValueError, 'No axis named', f._get_axis_number, 2) - tm.assert_raises_regex( - ValueError, 'No axis.*foo', f._get_axis_name, 'foo') - tm.assert_raises_regex( - ValueError, 'No axis.*None', f._get_axis_name, None) - tm.assert_raises_regex(ValueError, 'No axis named', - f._get_axis_number, None) + with pytest.raises(ValueError, match='No axis named'): + f._get_axis_number(2) + + with pytest.raises(ValueError, match='No axis.*foo'): + f._get_axis_name('foo') + + with pytest.raises(ValueError, match='No axis.*None'): + f._get_axis_name(None) + + with pytest.raises(ValueError, match='No axis named'): + f._get_axis_number(None) def test_keys(self, float_frame): getkeys = float_frame.keys diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index b71af4b777022..3cdb223a813b7 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -173,7 +173,7 @@ def _check_unaligned_frame(meth, op, df, other): # NAs msg = "Unable to coerce to Series/DataFrame" tm.assert_frame_equal(f(np.nan), o(df, np.nan)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): f(ndim_5) # Series @@ -382,7 +382,7 @@ def test_arith_flex_frame_raise(self, all_arithmetic_operators, for dim in range(3, 6): arr = np.ones((1,) * dim) msg = "Unable to coerce to Series/DataFrame" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): getattr(float_frame, op)(arr) def test_arith_flex_frame_corner(self, float_frame): @@ -397,10 +397,10 @@ def test_arith_flex_frame_corner(self, float_frame): result = float_frame[:0].add(float_frame) tm.assert_frame_equal(result, float_frame * np.nan) - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + with pytest.raises(NotImplementedError, match='fill_value'): float_frame.add(float_frame.iloc[0], fill_value=3) - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + with pytest.raises(NotImplementedError, match='fill_value'): float_frame.add(float_frame.iloc[0], axis='index', fill_value=3) def test_arith_flex_series(self, simple_frame): @@ -441,10 +441,10 @@ def test_arith_flex_zero_len_raises(self): df_len0 = pd.DataFrame([], columns=['A', 'B']) df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + with pytest.raises(NotImplementedError, match='fill_value'): df.add(ser_len0, fill_value='E') - with tm.assert_raises_regex(NotImplementedError, 'fill_value'): + with pytest.raises(NotImplementedError, match='fill_value'): df_len0.sub(df['A'], axis=None, fill_value=3) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 6186ce4d45ef2..de6ac251d117b 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -476,36 +476,36 @@ def test_reindex_positional_warns(self): def test_reindex_axis_style_raises(self): # https://github.com/pandas-dev/pandas/issues/12392 df = pd.DataFrame({"A": [1, 2, 3], 'B': [4, 5, 6]}) - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex([0, 1], ['A'], axis=1) - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex([0, 1], ['A'], axis='index') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='index') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='columns') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(columns=[0, 1], axis='columns') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], columns=[0, 1], axis='columns') - with tm.assert_raises_regex(TypeError, 'Cannot specify all'): + with pytest.raises(TypeError, match='Cannot specify all'): df.reindex([0, 1], [0], ['A']) # Mixing styles - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='index') - with tm.assert_raises_regex(TypeError, "Cannot specify both 'axis'"): + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): df.reindex(index=[0, 1], axis='columns') # Duplicates - with tm.assert_raises_regex(TypeError, "multiple values"): + with pytest.raises(TypeError, match="multiple values"): df.reindex([0, 1], labels=[0, 1]) def test_reindex_single_named_indexer(self): @@ -820,23 +820,23 @@ def test_filter(self): tm.assert_frame_equal(filtered, expected) # pass in None - with tm.assert_raises_regex(TypeError, 'Must pass'): + with pytest.raises(TypeError, match='Must pass'): self.frame.filter() - with tm.assert_raises_regex(TypeError, 'Must pass'): + with pytest.raises(TypeError, match='Must pass'): self.frame.filter(items=None) - with tm.assert_raises_regex(TypeError, 'Must pass'): + with pytest.raises(TypeError, match='Must pass'): self.frame.filter(axis=1) # test mutually exclusive arguments - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', like='bbi') - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$', axis=1) - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], regex='e$') - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi', axis=0) - with tm.assert_raises_regex(TypeError, 'mutually exclusive'): + with pytest.raises(TypeError, match='mutually exclusive'): self.frame.filter(items=['one', 'three'], like='bbi') # objects @@ -1160,5 +1160,5 @@ def test_drop_empty_list(self, index, drop_labels): @pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]]) def test_drop_non_empty_list(self, index, drop_labels): # GH 21494 - with tm.assert_raises_regex(KeyError, 'not found in axis'): + with pytest.raises(KeyError, match='not found in axis'): pd.DataFrame(index=index).drop(drop_labels) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 8a7d7d790a1b4..224e56777f6b4 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -474,7 +474,7 @@ def test_convert_objects(self, float_string_frame): # via astype, but errors converted = float_string_frame.copy() - with tm.assert_raises_regex(ValueError, 'invalid literal'): + with pytest.raises(ValueError, match='invalid literal'): converted['H'].astype('int32') # mixed in a single column diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 3b8d6e6c55ed1..22c5d146e1a06 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -127,13 +127,13 @@ def test_append_series_dict(self): columns=['foo', 'bar', 'baz', 'qux']) series = df.loc[4] - with tm.assert_raises_regex(ValueError, - 'Indexes have overlapping values'): + msg = 'Indexes have overlapping values' + with pytest.raises(ValueError, match=msg): df.append(series, verify_integrity=True) + series.name = None - with tm.assert_raises_regex(TypeError, - 'Can only append a Series if ' - 'ignore_index=True'): + msg = 'Can only append a Series if ignore_index=True' + with pytest.raises(TypeError, match=msg): df.append(series, verify_integrity=True) result = df.append(series[::-1], ignore_index=True) @@ -321,7 +321,7 @@ def test_update_raise(self): other = DataFrame([[2., nan], [nan, 7]], index=[1, 3], columns=[1, 2]) - with tm.assert_raises_regex(ValueError, "Data overlaps"): + with pytest.raises(ValueError, match="Data overlaps"): df.update(other, raise_conflict=True) def test_update_from_non_df(self): @@ -470,7 +470,7 @@ def test_concat_axis_parameter(self): assert_frame_equal(concatted_1_series, expected_columns_series) # Testing ValueError - with tm.assert_raises_regex(ValueError, 'No axis named'): + with pytest.raises(ValueError, match='No axis named'): pd.concat([series1, series2], axis='something') def test_concat_numerical_names(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 442ce27a730a6..c71d5d9f977f6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -292,10 +292,10 @@ def test_constructor_dict(self): # GH10856 # dict with scalar values should raise error, even if columns passed msg = 'If using all scalar values, you must pass an index' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame({'a': 0.7}) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame({'a': 0.7}, columns=['a']) @pytest.mark.parametrize("scalar", [2, np.nan, None, 'D']) @@ -377,40 +377,43 @@ def test_constructor_multi_index(self): def test_constructor_error_msgs(self): msg = "Empty data passed with indices specified." # passing an empty array with columns specified. - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame(np.empty(0), columns=list('abc')) msg = "Mixing dicts with non-Series may lead to ambiguous ordering." # mix dict and array, wrong size - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame({'A': {'a': 'a', 'b': 'b'}, 'B': ['a', 'b', 'c']}) # wrong size ndarray, GH 3105 msg = r"Shape of passed values is \(3, 4\), indices imply \(3, 3\)" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame(np.arange(12).reshape((4, 3)), columns=['foo', 'bar', 'baz'], index=pd.date_range('2000-01-01', periods=3)) # higher dim raise exception - with tm.assert_raises_regex(ValueError, 'Must pass 2-d input'): + with pytest.raises(ValueError, match='Must pass 2-d input'): DataFrame(np.zeros((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) # wrong size axis labels - with tm.assert_raises_regex(ValueError, "Shape of passed values " - r"is \(3, 2\), indices " - r"imply \(3, 1\)"): + msg = ("Shape of passed values " + r"is \(3, 2\), indices " + r"imply \(3, 1\)") + with pytest.raises(ValueError, match=msg): DataFrame(np.random.rand(2, 3), columns=['A', 'B', 'C'], index=[1]) - with tm.assert_raises_regex(ValueError, "Shape of passed values " - r"is \(3, 2\), indices " - r"imply \(2, 2\)"): + msg = ("Shape of passed values " + r"is \(3, 2\), indices " + r"imply \(2, 2\)") + with pytest.raises(ValueError, match=msg): DataFrame(np.random.rand(2, 3), columns=['A', 'B'], index=[1, 2]) - with tm.assert_raises_regex(ValueError, "If using all scalar " - "values, you must pass " - "an index"): + msg = ("If using all scalar " + "values, you must pass " + "an index") + with pytest.raises(ValueError, match=msg): DataFrame({'a': False, 'b': True}) def test_constructor_with_embedded_frames(self): @@ -637,14 +640,14 @@ def _check_basic_constructor(self, empty): # wrong size axis labels msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame(mat, columns=['A', 'B', 'C'], index=[1]) msg = r'Shape of passed values is \(3, 2\), indices imply \(2, 2\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame(mat, columns=['A', 'B'], index=[1, 2]) # higher dim raise exception - with tm.assert_raises_regex(ValueError, 'Must pass 2-d input'): + with pytest.raises(ValueError, match='Must pass 2-d input'): DataFrame(empty((3, 3, 3)), columns=['A', 'B', 'C'], index=[1]) @@ -829,7 +832,7 @@ def test_constructor_arrays_and_scalars(self): exp = DataFrame({'a': df['a'].values, 'b': [True] * 10}) tm.assert_frame_equal(df, exp) - with tm.assert_raises_regex(ValueError, 'must pass an index'): + with pytest.raises(ValueError, match='must pass an index'): DataFrame({'a': False, 'b': True}) def test_constructor_DataFrame(self): @@ -862,7 +865,7 @@ def test_constructor_more(self): # can't cast mat = np.array(['foo', 'bar'], dtype=object).reshape(2, 1) - with tm.assert_raises_regex(ValueError, 'cast'): + with pytest.raises(ValueError, match='cast'): DataFrame(mat, index=[0, 1], columns=[0], dtype=float) dm = DataFrame(DataFrame(self.frame._series)) @@ -1108,8 +1111,7 @@ class CustomDict(dict): def test_constructor_ragged(self): data = {'A': randn(10), 'B': randn(8)} - with tm.assert_raises_regex(ValueError, - 'arrays must all be same length'): + with pytest.raises(ValueError, match='arrays must all be same length'): DataFrame(data) def test_constructor_scalar(self): @@ -1131,7 +1133,7 @@ def test_constructor_mixed_dict_and_Series(self): assert result.index.is_monotonic # ordering ambiguous, raise exception - with tm.assert_raises_regex(ValueError, 'ambiguous ordering'): + with pytest.raises(ValueError, match='ambiguous ordering'): DataFrame({'A': ['a', 'b'], 'B': {'a': 'a', 'b': 'b'}}) # this is OK though @@ -1185,10 +1187,10 @@ def test_from_dict_columns_parameter(self): tm.assert_frame_equal(result, expected) msg = "cannot use columns parameter with orient='columns'" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), orient='columns', columns=['one', 'two']) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), columns=['one', 'two']) @@ -1299,9 +1301,8 @@ def test_constructor_from_items(self): tm.assert_frame_equal(recons, self.mixed_frame) assert recons['A'].dtype == np.float64 - with tm.assert_raises_regex(TypeError, - "Must pass columns with " - "orient='index'"): + msg = "Must pass columns with orient='index'" + with pytest.raises(TypeError, match=msg): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): DataFrame.from_items(row_items, orient='index') @@ -1331,16 +1332,16 @@ def test_constructor_from_items(self): def test_constructor_from_items_scalars(self): # GH 17312 - with tm.assert_raises_regex(ValueError, - r'The value in each \(key, value\) ' - 'pair must be an array, Series, or dict'): + msg = (r'The value in each \(key, value\) ' + 'pair must be an array, Series, or dict') + with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): DataFrame.from_items([('A', 1), ('B', 4)]) - with tm.assert_raises_regex(ValueError, - r'The value in each \(key, value\) ' - 'pair must be an array, Series, or dict'): + msg = (r'The value in each \(key, value\) ' + 'pair must be an array, Series, or dict') + with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], @@ -1363,8 +1364,8 @@ def test_constructor_mix_series_nonseries(self): 'B': list(self.frame['B'])}, columns=['A', 'B']) tm.assert_frame_equal(df, self.frame.loc[:, ['A', 'B']]) - with tm.assert_raises_regex(ValueError, 'does not match ' - 'index length'): + msg = 'does not match index length' + with pytest.raises(ValueError, match=msg): DataFrame({'A': self.frame['A'], 'B': list(self.frame['B'])[:-2]}) def test_constructor_miscast_na_int_dtype(self): @@ -1419,8 +1420,9 @@ def test_constructor_single_value(self): pytest.raises(ValueError, DataFrame, 'a', [1, 2]) pytest.raises(ValueError, DataFrame, 'a', columns=['a', 'c']) - with tm.assert_raises_regex(TypeError, 'incompatible data ' - 'and dtype'): + + msg = 'incompatible data and dtype' + with pytest.raises(TypeError, match=msg): DataFrame('a', [1, 2], ['a', 'c'], float) def test_constructor_with_datetimes(self): @@ -1783,7 +1785,7 @@ def test_from_records_to_records(self): # wrong length msg = r'Shape of passed values is \(3, 2\), indices imply \(3, 1\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): DataFrame.from_records(arr, index=index[:-1]) indexed_frame = DataFrame.from_records(arr, index='f1') diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 2dbf3e9784749..2ad6da084e451 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -329,9 +329,8 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): def test_select_dtypes_empty(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4))}) - with tm.assert_raises_regex(ValueError, 'at least one of ' - 'include or exclude ' - 'must be nonempty'): + msg = 'at least one of include or exclude must be nonempty' + with pytest.raises(ValueError, match=msg): df.select_dtypes() def test_select_dtypes_bad_datetime64(self): @@ -341,10 +340,10 @@ def test_select_dtypes_bad_datetime64(self): 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.date_range('now', periods=3).values}) - with tm.assert_raises_regex(ValueError, '.+ is too specific'): + with pytest.raises(ValueError, match='.+ is too specific'): df.select_dtypes(include=['datetime64[D]']) - with tm.assert_raises_regex(ValueError, '.+ is too specific'): + with pytest.raises(ValueError, match='.+ is too specific'): df.select_dtypes(exclude=['datetime64[as]']) def test_select_dtypes_datetime_with_tz(self): @@ -373,7 +372,7 @@ def test_select_dtypes_str_raises(self, dtype, arg): msg = "string dtypes are not allowed" kwargs = {arg: [dtype]} - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df.select_dtypes(**kwargs) def test_select_dtypes_bad_arg_raises(self): @@ -384,8 +383,9 @@ def test_select_dtypes_bad_arg_raises(self): 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.date_range('now', periods=3).values}) - with tm.assert_raises_regex(TypeError, 'data type.' - '*not understood'): + + msg = 'data type.*not understood' + with pytest.raises(TypeError, match=msg): df.select_dtypes(['blargy, blarg, blarg']) def test_select_dtypes_typecodes(self): @@ -514,7 +514,7 @@ def test_astype_cast_nan_inf_int(self, val, dtype): msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" df = DataFrame([val]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.astype(dtype) def test_astype_str(self, text_dtype): @@ -661,10 +661,10 @@ def test_astype_categorical(self, dtype): def test_astype_categoricaldtype_class_raises(self, cls): df = DataFrame({"A": ['a', 'a', 'b', 'c']}) xpr = "Expected an instance of {}".format(cls.__name__) - with tm.assert_raises_regex(TypeError, xpr): + with pytest.raises(TypeError, match=xpr): df.astype({"A": cls}) - with tm.assert_raises_regex(TypeError, xpr): + with pytest.raises(TypeError, match=xpr): df['A'].astype(cls) @pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16']) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 2467b2a89472b..b0e7fe2e25a6c 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -54,7 +54,7 @@ def test_getitem(self): assert self.frame[key] is not None assert 'random' not in self.frame - with tm.assert_raises_regex(KeyError, 'random'): + with pytest.raises(KeyError, match='random'): self.frame['random'] df = self.frame.copy() @@ -129,7 +129,7 @@ def test_getitem_listlike(self, idx_type, levels): assert_frame_equal(result, expected) idx = idx_type(keys + [missing]) - with tm.assert_raises_regex(KeyError, 'not in index'): + with pytest.raises(KeyError, match='not in index'): frame[idx] def test_getitem_callable(self): @@ -153,13 +153,12 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A'], check_names=False) assert_series_equal(self.frame['A'], data['B'], check_names=False) - with tm.assert_raises_regex(ValueError, - 'Columns must be same length as key'): + msg = 'Columns must be same length as key' + with pytest.raises(ValueError, match=msg): data[['A']] = self.frame[['A', 'B']] - with tm.assert_raises_regex(ValueError, 'Length of values ' - 'does not match ' - 'length of index'): + msg = 'Length of values does not match length of index' + with pytest.raises(ValueError, match=msg): data['A'] = range(len(data.index) - 1) df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_) @@ -242,13 +241,13 @@ def test_getitem_boolean(self): subframe = self.tsframe[indexer] tm.assert_index_equal(subindex, subframe.index) - with tm.assert_raises_regex(ValueError, 'Item wrong length'): + with pytest.raises(ValueError, match='Item wrong length'): self.tsframe[indexer[:-1]] subframe_obj = self.tsframe[indexer_obj] assert_frame_equal(subframe_obj, subframe) - with tm.assert_raises_regex(ValueError, 'boolean values only'): + with pytest.raises(ValueError, match='boolean values only'): self.tsframe[self.tsframe] # test that Series work @@ -545,7 +544,7 @@ def test_setitem_boolean(self): assert_almost_equal(df.values, values) msg = "Must pass DataFrame or 2-d ndarray with boolean values only" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df[df * 0] = 2 # index with DataFrame @@ -1466,7 +1465,7 @@ def test_getitem_fancy_ints(self): def test_getitem_setitem_fancy_exceptions(self): ix = self.frame.iloc - with tm.assert_raises_regex(IndexingError, 'Too many indexers'): + with pytest.raises(IndexingError, match='Too many indexers'): ix[:, :, :] with pytest.raises(IndexingError): @@ -1803,7 +1802,7 @@ def testit(df): with pytest.raises(KeyError): self.frame.lookup([self.frame.index[0]], ['xyz']) - with tm.assert_raises_regex(ValueError, 'same size'): + with pytest.raises(ValueError, match='same size'): self.frame.lookup(['a', 'b', 'c'], ['a']) def test_set_value(self): @@ -2513,7 +2512,7 @@ def test_boolean_indexing(self): df1[df1 > 2.0 * df2] = -1 assert_frame_equal(df1, expected) - with tm.assert_raises_regex(ValueError, 'Item wrong length'): + with pytest.raises(ValueError, match='Item wrong length'): df1[df1.index[:-1] > 2] = -1 def test_boolean_indexing_mixed(self): @@ -2547,7 +2546,7 @@ def test_boolean_indexing_mixed(self): msg = ("boolean setting on mixed-type|" "not supported between|" "unorderable types") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): # TODO: This message should be the same in PY2/PY3 df[df > 0.3] = 1 @@ -2733,7 +2732,7 @@ def test_where_invalid_input_single(self, cond): df = DataFrame({"a": [1, 2, 3]}) msg = "Boolean array expected for the condition" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.where(cond) @pytest.mark.parametrize("cond", [ @@ -2751,7 +2750,7 @@ def test_where_invalid_input_multiple(self, cond): df = DataFrame({"a": [1, 2, 3], "b": [2, 2, 2]}) msg = "Boolean array expected for the condition" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.where(cond) def test_where_dataframe_col_match(self): @@ -2773,7 +2772,7 @@ def test_where_ndframe_align(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) cond = [True] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.where(cond) expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]]) @@ -2782,7 +2781,7 @@ def test_where_ndframe_align(self): tm.assert_frame_equal(out, expected) cond = np.array([False, True, False, True]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.where(cond) expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]]) @@ -2872,9 +2871,9 @@ def test_where_none(self): # GH 7656 df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, { 'A': np.nan, 'B': 'Test', 'C': np.nan}]) - expected = df.where(~isna(df), None) - with tm.assert_raises_regex(TypeError, 'boolean setting ' - 'on mixed-type'): + msg = 'boolean setting on mixed-type' + + with pytest.raises(TypeError, match=msg): df.where(~isna(df), None, inplace=True) def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): @@ -3162,7 +3161,7 @@ def test_type_error_multiindex(self): dg = df.pivot_table(index='i', columns='c', values=['x', 'y']) - with tm.assert_raises_regex(TypeError, "is an invalid key"): + with pytest.raises(TypeError, match="is an invalid key"): str(dg[:, 0]) index = Index(range(2), name='i') diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index ccdba6df2521a..1c7f3ed834289 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -94,13 +94,13 @@ def test_join_index(frame): tm.assert_index_equal(joined.index, frame.index.sort_values()) tm.assert_index_equal(joined.columns, expected_columns) - tm.assert_raises_regex( - ValueError, 'join method', f.join, f2, how='foo') + with pytest.raises(ValueError, match='join method'): + f.join(f2, how='foo') # corner case - overlapping columns + msg = 'columns overlap but no suffix' for how in ('outer', 'left', 'inner'): - with tm.assert_raises_regex(ValueError, 'columns overlap but ' - 'no suffix'): + with pytest.raises(ValueError, match=msg): frame.join(frame, how=how) @@ -131,7 +131,8 @@ def test_join_index_series(frame): tm.assert_frame_equal(joined, frame, check_names=False) s.name = None - tm.assert_raises_regex(ValueError, 'must have a name', df.join, s) + with pytest.raises(ValueError, match='must have a name'): + df.join(s) def test_join_overlap(frame): diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 9d1bd9e9a0234..200e134838949 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -330,8 +330,8 @@ def test_na_actions_categorical(self): res = df.fillna(value={"cats": 3, "vals": "b"}) tm.assert_frame_equal(res, df_exp_fill) - with tm.assert_raises_regex(ValueError, "fill value must be " - "in categories"): + with pytest.raises(ValueError, match=("fill value must " + "be in categories")): df.fillna(value={"cats": 4, "vals": "c"}) res = df.fillna(method='pad') @@ -555,8 +555,7 @@ def test_fillna_dict_series(self): assert_frame_equal(result, expected) # disable this for now - with tm.assert_raises_regex(NotImplementedError, - 'column by column'): + with pytest.raises(NotImplementedError, match='column by column'): df.fillna(df.max(1), axis=1) def test_fillna_dataframe(self): @@ -596,7 +595,7 @@ def test_fillna_columns(self): assert_frame_equal(result, expected) def test_fillna_invalid_method(self): - with tm.assert_raises_regex(ValueError, 'ffil'): + with pytest.raises(ValueError, match='ffil'): self.frame.fillna(method='ffil') def test_fillna_invalid_value(self): @@ -820,11 +819,10 @@ def test_interp_raise_on_all_object_dtype(self): 'A': [1, 2, 3], 'B': [4, 5, 6]}, dtype='object') - with tm.assert_raises_regex( - TypeError, - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype."): + msg = ("Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype.") + with pytest.raises(TypeError, match=msg): df.interpolate() def test_interp_inplace(self): diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 102cc52aa46cb..03ca3941f6031 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -126,7 +126,7 @@ def test_insert_error_msmgs(self): s = DataFrame({'foo': ['a', 'b', 'c', 'a'], 'fiz': [ 'g', 'h', 'i', 'j']}).set_index('foo') msg = 'cannot reindex from a duplicate axis' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df['newcol'] = s # GH 4107, more descriptive error message @@ -134,7 +134,7 @@ def test_insert_error_msmgs(self): columns=['a', 'b', 'c', 'd']) msg = 'incompatible index of inserted column with frame index' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): df['gr'] = df.groupby(['b', 'c']).count() def test_insert_benchmark(self): @@ -178,7 +178,7 @@ def test_insert(self): result = Series(dict(float32=2, float64=4, int32=1)) assert (df.get_dtype_counts().sort_index() == result).all() - with tm.assert_raises_regex(ValueError, 'already exists'): + with pytest.raises(ValueError, match='already exists'): df.insert(1, 'a', df['b']) pytest.raises(ValueError, df.insert, 1, 'c', df['b']) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 0b32ec89d3909..df88bee3b35bf 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -51,7 +51,7 @@ def check(result, expected=None): [2, 1, 3, 5, 'bah']], columns=['foo', 'bar', 'foo', 'hello', 'string']) check(df, expected) - with tm.assert_raises_regex(ValueError, 'Length of value'): + with pytest.raises(ValueError, match='Length of value'): df.insert(0, 'AnotherColumn', range(len(df.index) - 1)) # insert same dtype @@ -101,8 +101,9 @@ def check(result, expected=None): check(df, expected) # insert a dup - tm.assert_raises_regex(ValueError, 'cannot insert', - df.insert, 2, 'new_col', 4.) + with pytest.raises(ValueError, match='cannot insert'): + df.insert(2, 'new_col', 4.) + df.insert(2, 'new_col', 4., allow_duplicates=True) expected = DataFrame([[1, 1, 4., 5., 'bah', 3], [1, 2, 4., 5., 'bah', 3], diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 65459735e639b..89d45639f3e03 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -496,8 +496,7 @@ def test_comp(func): tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) - with tm.assert_raises_regex(ValueError, - 'dim must be <= 2'): + with pytest.raises(ValueError, match='dim must be <= 2'): func(df1, ndim_5) result2 = func(self.simple, row) @@ -508,9 +507,8 @@ def test_comp(func): tm.assert_numpy_array_equal(result3.values, func(self.frame.values, 0)) - with tm.assert_raises_regex(ValueError, - 'Can only compare identically' - '-labeled DataFrame'): + msg = 'Can only compare identically-labeled DataFrame' + with pytest.raises(ValueError, match=msg): func(self.simple, self.simple[:2]) test_comp(operator.eq) @@ -551,11 +549,11 @@ def test_boolean_comparison(self): msg1d = 'Unable to coerce to Series, length must be 2: given 3' msg2d = 'Unable to coerce to DataFrame, shape must be' msg2db = 'operands could not be broadcast together with shapes' - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): # wrong shape df > lst - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): # wrong shape result = df > tup @@ -566,10 +564,10 @@ def test_boolean_comparison(self): result = df.values > b_r assert_numpy_array_equal(result, expected.values) - with tm.assert_raises_regex(ValueError, msg2d): + with pytest.raises(ValueError, match=msg2d): df > b_c - with tm.assert_raises_regex(ValueError, msg2db): + with pytest.raises(ValueError, match=msg2db): df.values > b_c # == @@ -577,10 +575,10 @@ def test_boolean_comparison(self): result = df == b assert_frame_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): result = df == lst - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): result = df == tup # broadcasts like ndarray (GH#23000) @@ -590,7 +588,7 @@ def test_boolean_comparison(self): result = df.values == b_r assert_numpy_array_equal(result, expected.values) - with tm.assert_raises_regex(ValueError, msg2d): + with pytest.raises(ValueError, match=msg2d): df == b_c assert df.values.shape != b_c.shape @@ -601,10 +599,10 @@ def test_boolean_comparison(self): expected.index = df.index expected.columns = df.columns - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): result = df == lst - with tm.assert_raises_regex(ValueError, msg1d): + with pytest.raises(ValueError, match=msg1d): result = df == tup def test_combine_generic(self): @@ -774,10 +772,10 @@ def test_alignment_non_pandas(self): msg = 'Unable to coerce to Series, length must be 3: given 2' for val in [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): align(df, val, 'index') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): align(df, val, 'columns') val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) @@ -791,10 +789,10 @@ def test_alignment_non_pandas(self): # shape mismatch msg = 'Unable to coerce to DataFrame, shape must be' val = np.array([[1, 2, 3], [4, 5, 6]]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): align(df, val, 'index') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): align(df, val, 'columns') val = np.zeros((3, 3, 3)) diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index d52b848bebad1..2a8add1a5de92 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -1,3 +1,4 @@ +import pytest import numpy as np from numpy.random import randn from datetime import timedelta @@ -111,8 +112,8 @@ def _get_with_delta(delta, freq='A-DEC'): tm.assert_index_equal(result.columns, exp_index) # invalid axis - tm.assert_raises_regex( - ValueError, 'axis', df.to_timestamp, axis=2) + with pytest.raises(ValueError, match='axis'): + df.to_timestamp(axis=2) result1 = df.to_timestamp('5t', axis=1) result2 = df.to_timestamp('t', axis=1) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 3dbac79fed02b..a7c91dd36b2d2 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -220,7 +220,7 @@ def test_quantile_datetime(self): def test_quantile_invalid(self): msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.tsframe.quantile(invalid) def test_quantile_box(self): diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 3c6f0f0b2ab94..9ab7b04725978 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -141,10 +141,10 @@ def test_query_non_str(self): df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'b']}) msg = "expr must be a string to be evaluated" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.query(lambda x: x.B == "b") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.query(111) def test_query_empty_string(self): @@ -152,7 +152,7 @@ def test_query_empty_string(self): df = pd.DataFrame({'A': [1, 2, 3]}) msg = "expr cannot be an empty string" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.query('') def test_eval_resolvers_as_list(self): @@ -524,8 +524,8 @@ def test_query_builtin(self): df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list('abc')) df.index.name = 'sin' - with tm.assert_raises_regex(NumExprClobberingError, - 'Variables in expression.+'): + msg = 'Variables in expression.+' + with pytest.raises(NumExprClobberingError, match=msg): df.query('sin > 5', engine=engine, parser=parser) def test_query(self): @@ -657,9 +657,11 @@ def test_query_undefined_local(self): from pandas.core.computation.ops import UndefinedVariableError engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) + df = DataFrame(np.random.rand(10, 2), columns=list('ab')) - with tm.assert_raises_regex(UndefinedVariableError, - "local variable 'c' is not defined"): + msg = "local variable 'c' is not defined" + + with pytest.raises(UndefinedVariableError, match=msg): df.query('a == @c', engine=engine, parser=parser) def test_index_resolvers_come_after_columns_with_the_same_name(self): @@ -1037,7 +1039,7 @@ def test_bool_arith_expr(self, parser, engine): @pytest.mark.parametrize('op', ['+', '-', '*', '/']) def test_invalid_type_for_operator_raises(self, parser, engine, op): df = DataFrame({'a': [1, 2], 'b': ['c', 'd']}) - with tm.assert_raises_regex(TypeError, - r"unsupported operand type\(s\) " - "for .+: '.+' and '.+'"): + msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'" + + with pytest.raises(TypeError, match=msg): df.eval('a {0} b'.format(op), engine=engine, parser=parser) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index 3134686c2a2d9..078c48539de16 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -194,11 +194,11 @@ def test_rank_na_option(self): # bad values throw error msg = "na_option must be one of 'keep', 'top', or 'bottom'" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.frame.rank(na_option='bad', ascending=False) # invalid type - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.frame.rank(na_option=True, ascending=False) def test_rank_axis(self): diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index bf755b1dac4b8..bfb358a3e8c45 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -17,9 +17,6 @@ from pandas.util.testing import (assert_series_equal, assert_frame_equal) - -import pandas.util.testing as tm - from pandas.tests.frame.common import TestData @@ -612,9 +609,9 @@ def test_replace_with_empty_list(self): assert_frame_equal(result, expected) # GH 19266 - with tm.assert_raises_regex(ValueError, "cannot assign mismatch"): + with pytest.raises(ValueError, match="cannot assign mismatch"): df.replace({np.nan: []}) - with tm.assert_raises_regex(ValueError, "cannot assign mismatch"): + with pytest.raises(ValueError, match="cannot assign mismatch"): df.replace({np.nan: ['dummy', 'alt']}) def test_replace_series_dict(self): @@ -923,7 +920,7 @@ def test_replace_bool_with_bool(self): def test_replace_with_dict_with_bool_keys(self): df = DataFrame({0: [True, False], 1: [False, True]}) - with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): + with pytest.raises(TypeError, match='Cannot compare types .+'): df.replace({'asdf': 'asdb', True: 'yes'}) def test_replace_truthy(self): @@ -934,8 +931,7 @@ def test_replace_truthy(self): def test_replace_int_to_int_chain(self): df = DataFrame({'a': lrange(1, 5)}) - with tm.assert_raises_regex(ValueError, - "Replacement not allowed .+"): + with pytest.raises(ValueError, match="Replacement not allowed .+"): df.replace({'a': dict(zip(range(1, 5), range(2, 6)))}) def test_replace_str_to_str_chain(self): @@ -943,8 +939,7 @@ def test_replace_str_to_str_chain(self): astr = a.astype(str) bstr = np.arange(2, 6).astype(str) df = DataFrame({'a': astr}) - with tm.assert_raises_regex(ValueError, - "Replacement not allowed .+"): + with pytest.raises(ValueError, match="Replacement not allowed .+"): df.replace({'a': dict(zip(astr, bstr))}) def test_replace_swapping_bug(self): diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index ab3d6ca3b19f7..a53b01466c7a4 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -66,7 +66,7 @@ def test_pivot_duplicates(self): data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'], 'b': ['one', 'two', 'one', 'one', 'two'], 'c': [1., 2., 3., 3., 4.]}) - with tm.assert_raises_regex(ValueError, 'duplicate entries'): + with pytest.raises(ValueError, match='duplicate entries'): data.pivot('a', 'b', 'c') def test_pivot_empty(self): @@ -317,7 +317,7 @@ def test_unstack_fill_frame_categorical(self): # Fill with non-category results in a TypeError msg = r"'fill_value' \('d'\) is not in" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): data.unstack(fill_value='d') # Fill with category value replaces missing values as expected diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index b99e8983b5ba1..dd70d3df7d1b9 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -79,7 +79,7 @@ def test_sort_values(self): assert_frame_equal(sorted_df, expected) msg = r'Length of ascending \(5\) != length of by \(2\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5) def test_sort_values_inplace(self): @@ -269,7 +269,7 @@ def test_sort_datetimes(self): def test_frame_column_inplace_sort_exception(self): s = self.frame['A'] - with tm.assert_raises_regex(ValueError, "This Series is a view"): + with pytest.raises(ValueError, match="This Series is a view"): s.sort_values(inplace=True) cp = s.copy() @@ -447,26 +447,26 @@ def test_sort_index_duplicates(self): df = DataFrame([lrange(5, 9), lrange(4)], columns=['a', 'a', 'b', 'b']) - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): df.sort_values(by='a') - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by=['a']) - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): df.sort_values(by=['a']) - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): # multi-column 'by' is separate codepath df.sort_index(by=['a', 'b']) - with tm.assert_raises_regex(ValueError, 'not unique'): + with pytest.raises(ValueError, match='not unique'): # multi-column 'by' is separate codepath df.sort_values(by=['a', 'b']) @@ -474,11 +474,11 @@ def test_sort_index_duplicates(self): # GH4370 df = DataFrame(np.random.randn(4, 2), columns=MultiIndex.from_tuples([('a', 0), ('a', 1)])) - with tm.assert_raises_regex(ValueError, 'level'): + with pytest.raises(ValueError, match='level'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') - with tm.assert_raises_regex(ValueError, 'level'): + with pytest.raises(ValueError, match='level'): df.sort_values(by='a') # convert tuples to a list of tuples diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index f6980a8585436..b27f60d437f57 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -156,7 +156,7 @@ class A(DataFrame): @property def bar(self): return self.i_dont_exist - with tm.assert_raises_regex(AttributeError, '.*i_dont_exist.*'): + with pytest.raises(AttributeError, match='.*i_dont_exist.*'): A().bar def test_subclass_align(self): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index eecbdc0130f02..5794630e72419 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -18,8 +18,7 @@ from pandas.util.testing import (assert_series_equal, assert_frame_equal, - assert_index_equal, - assert_raises_regex) + assert_index_equal) import pandas.util.testing as tm from pandas.compat import product @@ -276,9 +275,9 @@ def test_shift(self): assert_frame_equal(shifted2, shifted3) assert_frame_equal(ps, shifted2.shift(-1, 'B')) - tm.assert_raises_regex(ValueError, - 'does not match PeriodIndex freq', - ps.shift, freq='D') + msg = 'does not match PeriodIndex freq' + with pytest.raises(ValueError, match=msg): + ps.shift(freq='D') # shift other axis # GH 6371 @@ -360,8 +359,8 @@ def test_tshift(self): shifted3 = ps.tshift(freq=offsets.BDay()) assert_frame_equal(shifted, shifted3) - tm.assert_raises_regex( - ValueError, 'does not match', ps.tshift, freq='M') + with pytest.raises(ValueError, match='does not match'): + ps.tshift(freq='M') # DatetimeIndex shifted = self.tsframe.tshift(1) @@ -437,16 +436,16 @@ def test_truncate_nonsortedindex(self): df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e']}, index=[5, 3, 2, 9, 0]) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + with pytest.raises(ValueError, match=msg): df.truncate(before=3, after=9) rng = pd.date_range('2011-01-01', '2012-01-01', freq='W') ts = pd.DataFrame({'A': np.random.randn(len(rng)), 'B': np.random.randn(len(rng))}, index=rng) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + with pytest.raises(ValueError, match=msg): ts.sort_values('A', ascending=False).truncate(before='2011-11', after='2011-12') @@ -455,8 +454,8 @@ def test_truncate_nonsortedindex(self): 2: np.random.randn(5), 0: np.random.randn(5)}, columns=[3, 20, 2, 0]) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + with pytest.raises(ValueError, match=msg): df.truncate(before=2, after=20, axis=1) def test_asfreq(self): @@ -822,17 +821,17 @@ def test_tz_convert_and_localize(self, fn): # Bad Inputs # Not DatetimeIndex / PeriodIndex - with assert_raises_regex(TypeError, 'DatetimeIndex'): + with pytest.raises(TypeError, match='DatetimeIndex'): df = DataFrame(index=int_idx) df = getattr(df, fn)('US/Pacific') # Not DatetimeIndex / PeriodIndex - with assert_raises_regex(TypeError, 'DatetimeIndex'): + with pytest.raises(TypeError, match='DatetimeIndex'): df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) df = getattr(df, fn)('US/Pacific', level=0) # Invalid level - with assert_raises_regex(ValueError, 'not valid'): + with pytest.raises(ValueError, match='not valid'): df = DataFrame(index=l0) df = getattr(df, fn)('US/Pacific', level=1) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index aa91b7510a2b5..b56375d0a8670 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -621,12 +621,12 @@ def _make_frame(names=None): for i in [6, 7]: msg = 'len of {i}, but only 5 lines in file'.format(i=i) - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): read_csv(path, header=lrange(i), index_col=0) # write with cols - with tm.assert_raises_regex(TypeError, 'cannot specify cols ' - 'with a MultiIndex'): + msg = 'cannot specify cols with a MultiIndex' + with pytest.raises(TypeError, match=msg): df.to_csv(path, columns=['foo', 'bar']) with ensure_clean('__tmp_to_csv_multiindex__') as path: @@ -1124,11 +1124,11 @@ def test_to_csv_quoting(self): assert result == expected msg = "need to escape, but no escapechar set" - tm.assert_raises_regex(csv.Error, msg, df.to_csv, - quoting=csv.QUOTE_NONE) - tm.assert_raises_regex(csv.Error, msg, df.to_csv, - quoting=csv.QUOTE_NONE, - escapechar=None) + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE) + + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None) expected_rows = [',c_bool,c_float,c_int,c_string', '0,True,1.0,42.0,a', diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index 2de0e866f6e70..c609712b471e7 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -1,7 +1,6 @@ from pandas.core.frame import DataFrame import pytest -import pandas.util.testing as tm @pytest.fixture @@ -29,5 +28,5 @@ def test_validate_bool_args(self, dataframe, func, inplace): elif func == "sort_values": kwargs["by"] = ["a"] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): getattr(dataframe, func)(**kwargs) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 46bb6303d8908..753e6161d8052 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -15,8 +15,7 @@ import pandas.io.formats.printing as printing from pandas.compat import range, zip, PY3 -from pandas.util.testing import (assert_raises_regex, - assert_series_equal, +from pandas.util.testing import (assert_series_equal, assert_panel_equal, assert_frame_equal) @@ -476,16 +475,16 @@ def test_unexpected_keyword(self): # GH8597 ts = df['joe'].copy() ts[2] = np.nan - with assert_raises_regex(TypeError, 'unexpected keyword'): + with pytest.raises(TypeError, match='unexpected keyword'): df.drop('joe', axis=1, in_place=True) - with assert_raises_regex(TypeError, 'unexpected keyword'): + with pytest.raises(TypeError, match='unexpected keyword'): df.reindex([1, 0], inplace=True) - with assert_raises_regex(TypeError, 'unexpected keyword'): + with pytest.raises(TypeError, match='unexpected keyword'): ca.fillna(0, inplace=True) - with assert_raises_regex(TypeError, 'unexpected keyword'): + with pytest.raises(TypeError, match='unexpected keyword'): ts.fillna(0, in_place=True) # See gh-12301 @@ -494,13 +493,13 @@ def test_stat_unexpected_keyword(self): starwars = 'Star Wars' errmsg = 'unexpected keyword' - with assert_raises_regex(TypeError, errmsg): + with pytest.raises(TypeError, match=errmsg): obj.max(epic=starwars) # stat_function - with assert_raises_regex(TypeError, errmsg): + with pytest.raises(TypeError, match=errmsg): obj.var(epic=starwars) # stat_function_ddof - with assert_raises_regex(TypeError, errmsg): + with pytest.raises(TypeError, match=errmsg): obj.sum(epic=starwars) # cum_function - with assert_raises_regex(TypeError, errmsg): + with pytest.raises(TypeError, match=errmsg): obj.any(epic=starwars) # logical_function def test_api_compat(self): @@ -520,13 +519,13 @@ def test_stat_non_defaults_args(self): out = np.array([0]) errmsg = "the 'out' parameter is not supported" - with assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): obj.max(out=out) # stat_function - with assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): obj.var(out=out) # stat_function_ddof - with assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): obj.sum(out=out) # cum_function - with assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): obj.any(out=out) # logical_function def test_truncate_out_of_bounds(self): @@ -807,23 +806,23 @@ def test_transpose(self): for p in [tm.makePanel()]: tm.assert_panel_equal(p.transpose(2, 0, 1) .transpose(1, 2, 0), p) - tm.assert_raises_regex(TypeError, msg, p.transpose, - 2, 0, 1, axes=(2, 0, 1)) + with pytest.raises(TypeError, match=msg): + p.transpose(2, 0, 1, axes=(2, 0, 1)) def test_numpy_transpose(self): msg = "the 'axes' parameter is not supported" s = tm.makeFloatSeries() - tm.assert_series_equal( - np.transpose(s), s) - tm.assert_raises_regex(ValueError, msg, - np.transpose, s, axes=1) + tm.assert_series_equal(np.transpose(s), s) + + with pytest.raises(ValueError, match=msg): + np.transpose(s, axes=1) df = tm.makeTimeDataFrame() - tm.assert_frame_equal(np.transpose( - np.transpose(df)), df) - tm.assert_raises_regex(ValueError, msg, - np.transpose, df, axes=1) + tm.assert_frame_equal(np.transpose(np.transpose(df)), df) + + with pytest.raises(ValueError, match=msg): + np.transpose(df, axes=1) with catch_warnings(record=True): simplefilter("ignore", FutureWarning) @@ -869,16 +868,16 @@ def test_take_invalid_kwargs(self): for obj in (s, df, p): msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, obj.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + obj.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, obj.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + obj.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, obj.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + obj.take(indices, mode='clip') def test_equals(self): s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) @@ -1018,7 +1017,7 @@ def test_pipe_panel(self): assert_panel_equal(result, expected) with pytest.raises(ValueError): - result = wp.pipe((f, 'y'), x=1, y=1) + wp.pipe((f, 'y'), x=1, y=1) @pytest.mark.parametrize('box', [pd.Series, pd.DataFrame]) def test_axis_classmethods(self, box): diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py index 4d78270c856ae..5cb5e935752a7 100644 --- a/pandas/tests/generic/test_label_or_level_utils.py +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -128,7 +128,7 @@ def test_is_level_reference_series_axis1_error(df): # Make series with L1 as index s = df.set_index('L1').L2 - with tm.assert_raises_regex(ValueError, "No axis named 1"): + with pytest.raises(ValueError, match="No axis named 1"): s._is_level_reference('L1', axis=1) @@ -138,7 +138,7 @@ def test_is_level_reference_panel_error(panel): msg = ("_is_level_reference is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._is_level_reference('L1', axis=0) @@ -146,7 +146,7 @@ def test_is_label_reference_panel_error(panel): msg = ("_is_label_reference is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._is_label_reference('L1', axis=0) @@ -154,7 +154,7 @@ def test_is_label_or_level_reference_panel_error(panel): msg = ("_is_label_or_level_reference is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._is_label_or_level_reference('L1', axis=0) @@ -176,7 +176,7 @@ def test_check_label_or_level_ambiguity_df(df_ambig, axis): # df_ambig has both an on-axis level and off-axis label named L1 # Therefore, L1 is ambiguous. - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df_ambig._check_label_or_level_ambiguity("L1", axis=axis) # df_ambig has an on-axis level named L2,, and it is not ambiguous. @@ -209,7 +209,7 @@ def test_check_label_or_level_ambiguity_series_axis1_error(df): # Make series with L1 as index s = df.set_index('L1').L2 - with tm.assert_raises_regex(ValueError, "No axis named 1"): + with pytest.raises(ValueError, match="No axis named 1"): s._check_label_or_level_ambiguity('L1', axis=1) @@ -219,7 +219,7 @@ def test_check_label_or_level_ambiguity_panel_error(panel): msg = ("_check_label_or_level_ambiguity is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._check_label_or_level_ambiguity("L1", axis=0) @@ -294,7 +294,7 @@ def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): else: expected_msg = "The index label 'L2' is not unique" - with tm.assert_raises_regex(ValueError, expected_msg): + with pytest.raises(ValueError, match=expected_msg): assert_label_values(df_duplabels, ['L2'], axis=axis) @@ -316,7 +316,7 @@ def test_get_label_or_level_values_series_axis1_error(df): # Make series with L1 as index s = df.set_index('L1').L2 - with tm.assert_raises_regex(ValueError, "No axis named 1"): + with pytest.raises(ValueError, match="No axis named 1"): s._get_label_or_level_values('L1', axis=1) @@ -326,7 +326,7 @@ def test_get_label_or_level_values_panel_error(panel): msg = ("_get_label_or_level_values is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._get_label_or_level_values('L1', axis=0) @@ -371,7 +371,7 @@ def test_drop_labels_or_levels_df(df_levels, axis): assert_labels_dropped(df_levels, expected_labels, axis=axis) assert_levels_dropped(df_levels, expected_levels, axis=axis) - with tm.assert_raises_regex(ValueError, "not valid labels or levels"): + with pytest.raises(ValueError, match="not valid labels or levels"): df_levels._drop_labels_or_levels('L4', axis=axis) @@ -383,14 +383,14 @@ def test_drop_labels_or_levels_series(df): s = df.set_index('L1').L2 assert_levels_dropped(s, ['L1'], axis=0) - with tm.assert_raises_regex(ValueError, "not valid labels or levels"): + with pytest.raises(ValueError, match="not valid labels or levels"): s._drop_labels_or_levels('L4', axis=0) # Make series with L1 and L2 as index s = df.set_index(['L1', 'L2']).L3 assert_levels_dropped(s, ['L1', 'L2'], axis=0) - with tm.assert_raises_regex(ValueError, "not valid labels or levels"): + with pytest.raises(ValueError, match="not valid labels or levels"): s._drop_labels_or_levels('L4', axis=0) @@ -400,5 +400,5 @@ def test_drop_labels_or_levels_panel_error(panel): msg = ("_drop_labels_or_levels is not implemented for {type}" .format(type=type(panel))) - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): panel._drop_labels_or_levels('L1', axis=0) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index b0d6a0e83440a..52bfee66f94f8 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -27,9 +27,9 @@ def test_agg_must_agg(df): grouped = df.groupby('A')['C'] msg = "Must produce aggregated value" - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): grouped.agg(lambda x: x.describe()) - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): grouped.agg(lambda x: x.index[:2]) @@ -217,7 +217,7 @@ def test_agg_multiple_functions_too_many_lambdas(df): funcs = ['mean', lambda x: x.mean(), lambda x: x.std()] msg = 'Function names must be unique, found multiple named ' - with tm.assert_raises_regex(SpecificationError, msg): + with pytest.raises(SpecificationError, match=msg): grouped.agg(funcs) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index d8a545b323674..d0e1f04238366 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -82,12 +82,12 @@ def test_cython_agg_nothing_to_agg(): 'b': ['foo', 'bar'] * 25}) msg = "No numeric types to aggregate" - with tm.assert_raises_regex(DataError, msg): + with pytest.raises(DataError, match=msg): frame.groupby('a')['b'].mean() frame = DataFrame({'a': np.random.randint(0, 5, 50), 'b': ['foo', 'bar'] * 25}) - with tm.assert_raises_regex(DataError, msg): + with pytest.raises(DataError, match=msg): frame[['b']].groupby(frame['a']).mean() @@ -96,7 +96,7 @@ def test_cython_agg_nothing_to_agg_with_dates(): 'b': ['foo', 'bar'] * 25, 'dates': pd.date_range('now', periods=50, freq='T')}) msg = "No numeric types to aggregate" - with tm.assert_raises_regex(DataError, msg): + with pytest.raises(DataError, match=msg): frame.groupby('b').dates.mean() diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index c35405ad739c9..fca863b4d8eb0 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -268,7 +268,7 @@ def test_agg_nested_dicts(): g = df.groupby(['A', 'B']) msg = r'cannot perform renaming for r[1-2] with a nested dictionary' - with tm.assert_raises_regex(SpecificationError, msg): + with pytest.raises(SpecificationError, match=msg): g.aggregate({'r1': {'C': ['mean', 'sum']}, 'r2': {'D': ['mean', 'sum']}}) @@ -302,7 +302,7 @@ def raiseException(df): pprint_thing(df.to_string()) raise TypeError('test') - with tm.assert_raises_regex(TypeError, 'test'): + with pytest.raises(TypeError, match='test'): df.groupby(0).agg(raiseException) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 873d9f6076b69..205b06c5b679f 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -542,8 +542,7 @@ def test_filter_enforces_scalarness(): ['worst', 'd', 'y'], ['best', 'd', 'z'], ], columns=['a', 'b', 'c']) - with tm.assert_raises_regex(TypeError, - 'filter function returned a.*'): + with pytest.raises(TypeError, match='filter function returned a.*'): df.groupby('c').filter(lambda g: g['a'] == 'best') @@ -557,8 +556,7 @@ def test_filter_non_bool_raises(): ['worst', 'd', 1], ['best', 'd', 1], ], columns=['a', 'b', 'c']) - with tm.assert_raises_regex(TypeError, - 'filter function returned a.*'): + with pytest.raises(TypeError, match='filter function returned a.*'): df.groupby('a').filter(lambda g: g.c.mean()) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 775747ce0c6c1..646445623778b 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -517,18 +517,20 @@ def test_nsmallest(): tm.assert_series_equal(gb.nsmallest(3, keep='last'), e) -def test_numpy_compat(): +@pytest.mark.parametrize("func", [ + 'mean', 'var', 'std', 'cumprod', 'cumsum' +]) +def test_numpy_compat(func): # see gh-12811 df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]}) g = df.groupby('A') msg = "numpy operations are not valid with groupby" - for func in ('mean', 'var', 'std', 'cumprod', 'cumsum'): - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(g, func), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(g, func), foo=1) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(foo=1) def test_cummin_cummax(): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 3cdd0965ccfd0..e92e5a70b263f 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -629,7 +629,7 @@ def test_as_index_series_column_slice_raises(df): grouped = df.groupby('A', as_index=False) msg = r"Column\(s\) C already selected" - with tm.assert_raises_regex(IndexError, msg): + with pytest.raises(IndexError, match=msg): grouped['C'].__getitem__('D') @@ -1679,7 +1679,7 @@ def test_tuple_correct_keyerror(): df = pd.DataFrame(1, index=range(3), columns=pd.MultiIndex.from_product([[1, 2], [3, 4]])) - with tm.assert_raises_regex(KeyError, "(7, 8)"): + with pytest.raises(KeyError, match="(7, 8)"): df.groupby((7, 8)).mean() diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index e7c0881b11871..546a37bf3d56a 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -21,7 +21,7 @@ # selection # -------------------------------- -class TestSelection(): +class TestSelection(object): def test_select_bad_cols(self): df = DataFrame([[1, 2]], columns=['A', 'B']) @@ -29,7 +29,7 @@ def test_select_bad_cols(self): pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] - with tm.assert_raises_regex(KeyError, '^[^A]+$'): + with pytest.raises(KeyError, match='^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'C']] @@ -506,18 +506,14 @@ def test_groupby_args(self, mframe): # PR8618 and issue 8015 frame = mframe - def j(): + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): frame.groupby() - tm.assert_raises_regex(TypeError, "You have to supply one of " - "'by' and 'level'", j) - - def k(): + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): frame.groupby(by=None, level=None) - tm.assert_raises_regex(TypeError, "You have to supply one of " - "'by' and 'level'", k) - @pytest.mark.parametrize('sort,labels', [ [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]], [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]] diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index f337af4d39e54..e7e91572c56d1 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -249,7 +249,7 @@ def test_rank_object_raises(ties_method, ascending, na_option, pct, vals): df = DataFrame({'key': ['foo'] * 5, 'val': vals}) - with tm.assert_raises_regex(TypeError, "not callable"): + with pytest.raises(TypeError, match="not callable"): df.groupby('key').rank(method=ties_method, ascending=ascending, na_option=na_option, pct=pct) @@ -269,7 +269,7 @@ def test_rank_naoption_raises(ties_method, ascending, na_option, pct, vals): df = DataFrame({'key': ['foo'] * 5, 'val': vals}) msg = "na_option must be one of 'keep', 'top', or 'bottom'" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.groupby('key').rank(method=ties_method, ascending=ascending, na_option=na_option, pct=pct) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 4cf63a321a47a..dbbf6e583796f 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -658,11 +658,11 @@ def test_transform_with_non_scalar_group(): df = pd.DataFrame(np.random.randint(1, 10, (4, 12)), columns=cols, index=['A', 'C', 'G', 'T']) - tm.assert_raises_regex(ValueError, 'transform must return ' - 'a scalar value for each ' - 'group.*', - df.groupby(axis=1, level=1).transform, - lambda z: z.div(z.sum(axis=1), axis=0)) + + msg = 'transform must return a scalar value for each group.*' + with pytest.raises(ValueError, match=msg): + df.groupby(axis=1, level=1).transform( + lambda z: z.div(z.sum(axis=1), axis=0)) @pytest.mark.parametrize('cols,exp,comp_func', [ diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index ae033f7b3f251..d5096ee99c8b0 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -263,7 +263,7 @@ def test_groupby_blacklist(df_letters): for obj in (df, s): gb = obj.groupby(df.letters) msg = fmt.format(bl, type(gb).__name__) - with tm.assert_raises_regex(AttributeError, msg): + with pytest.raises(AttributeError, match=msg): getattr(gb, bl) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c5cbaea23df76..4b0daac34c2e3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -125,39 +125,40 @@ def test_create_index_existing_name(self): def test_numeric_compat(self): idx = self.create_index() - tm.assert_raises_regex(TypeError, "cannot perform __mul__", - lambda: idx * 1) - tm.assert_raises_regex(TypeError, "cannot perform __rmul__", - lambda: 1 * idx) - - div_err = "cannot perform __truediv__" if PY3 \ - else "cannot perform __div__" - tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) + with pytest.raises(TypeError, match="cannot perform __mul__"): + idx * 1 + with pytest.raises(TypeError, match="cannot perform __rmul__"): + 1 * idx + + div_err = ("cannot perform __truediv__" if PY3 + else "cannot perform __div__") + with pytest.raises(TypeError, match=div_err): + idx / 1 + div_err = div_err.replace(' __', ' __r') - tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) - tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", - lambda: idx // 1) - tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", - lambda: 1 // idx) + with pytest.raises(TypeError, match=div_err): + 1 / idx + with pytest.raises(TypeError, match="cannot perform __floordiv__"): + idx // 1 + with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): + 1 // idx def test_logical_compat(self): idx = self.create_index() - tm.assert_raises_regex(TypeError, 'cannot perform all', - lambda: idx.all()) - tm.assert_raises_regex(TypeError, 'cannot perform any', - lambda: idx.any()) + with pytest.raises(TypeError, match='cannot perform all'): + idx.all() + with pytest.raises(TypeError, match='cannot perform any'): + idx.any() def test_boolean_context_compat(self): # boolean context compat idx = self.create_index() - def f(): + with pytest.raises(ValueError, match='The truth value of a'): if idx: pass - tm.assert_raises_regex(ValueError, 'The truth value of a', f) - def test_reindex_base(self): idx = self.create_index() expected = np.arange(idx.size, dtype=np.intp) @@ -165,7 +166,7 @@ def test_reindex_base(self): actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, 'Invalid fill method'): + with pytest.raises(ValueError, match='Invalid fill method'): idx.get_indexer(idx, method='invalid') def test_get_indexer_consistency(self): @@ -180,8 +181,8 @@ def test_get_indexer_consistency(self): assert indexer.dtype == np.intp else: e = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, e): - indexer = index.get_indexer(index[0:2]) + with pytest.raises(InvalidIndexError, match=e): + index.get_indexer(index[0:2]) indexer, _ = index.get_indexer_non_unique(index[0:2]) assert isinstance(indexer, np.ndarray) @@ -227,9 +228,8 @@ def test_repr_max_seq_item_setting(self): assert '...' not in str(idx) def test_wrong_number_names(self, indices): - def testit(ind): - ind.names = ["apple", "banana", "carrot"] - tm.assert_raises_regex(ValueError, "^Length", testit, indices) + with pytest.raises(ValueError, match="^Length"): + indices.names = ["apple", "banana", "carrot"] def test_set_name_methods(self, indices): new_name = "This is the new name for this index" @@ -247,10 +247,10 @@ def test_set_name_methods(self, indices): assert res is None assert indices.name == new_name assert indices.names == [new_name] - # with tm.assert_raises_regex(TypeError, "list-like"): + # with pytest.raises(TypeError, match="list-like"): # # should still fail even if it would be the right length # ind.set_names("a") - with tm.assert_raises_regex(ValueError, "Level must be None"): + with pytest.raises(ValueError, match="Level must be None"): indices.set_names("a", level=0) # rename in place just leaves tuples and other containers alone @@ -261,8 +261,9 @@ def test_set_name_methods(self, indices): def test_hash_error(self, indices): index = indices - tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__, hash, indices) + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): + hash(indices) def test_copy_name(self): # gh-12309: Check that the "name" argument @@ -511,16 +512,16 @@ def test_numpy_argsort(self): # backwards compatibility concerns if isinstance(type(ind), (CategoricalIndex, RangeIndex)): msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, - np.argsort, ind, axis=1) + with pytest.raises(ValueError, match=msg): + np.argsort(ind, axis=1) msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - ind, kind='mergesort') + with pytest.raises(ValueError, match=msg): + np.argsort(ind, kind='mergesort') msg = "the 'order' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - ind, order=('a', 'b')) + with pytest.raises(ValueError, match=msg): + np.argsort(ind, order=('a', 'b')) def test_pickle(self, indices): self.verify_pickle(indices) @@ -551,16 +552,16 @@ def test_take_invalid_kwargs(self): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') def test_repeat(self): rep = 2 @@ -580,8 +581,8 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(i, rep), expected) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.repeat, - i, rep, axis=0) + with pytest.raises(ValueError, match=msg): + np.repeat(i, rep, axis=0) @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) def test_where(self, klass): @@ -597,19 +598,16 @@ def test_where(self, klass): result = i.where(klass(cond)) tm.assert_index_equal(result, expected) - def test_setops_errorcases(self): + @pytest.mark.parametrize("case", [0.5, "xxx"]) + @pytest.mark.parametrize("method", ["intersection", "union", + "difference", "symmetric_difference"]) + def test_set_ops_error_cases(self, case, method): for name, idx in compat.iteritems(self.indices): - # # non-iterable input - cases = [0.5, 'xxx'] - methods = [idx.intersection, idx.union, idx.difference, - idx.symmetric_difference] - - for method in methods: - for case in cases: - tm.assert_raises_regex(TypeError, - "Input must be Index " - "or array-like", - method, case) + # non-iterable input + + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(idx, method)(case) def test_intersection_base(self): for name, idx in compat.iteritems(self.indices): @@ -628,8 +626,8 @@ def test_intersection_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.intersection(case) + with pytest.raises(ValueError, match=msg): + first.intersection(case) elif isinstance(idx, CategoricalIndex): pass else: @@ -638,8 +636,8 @@ def test_intersection_base(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.intersection([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3]) def test_union_base(self): for name, idx in compat.iteritems(self.indices): @@ -655,8 +653,8 @@ def test_union_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.union(case) + with pytest.raises(ValueError, match=msg): + first.union(case) elif isinstance(idx, CategoricalIndex): pass else: @@ -665,8 +663,8 @@ def test_union_base(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.union([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3]) def test_difference_base(self): for name, idx in compat.iteritems(self.indices): @@ -686,8 +684,8 @@ def test_difference_base(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.difference(case) + with pytest.raises(ValueError, match=msg): + first.difference(case) elif isinstance(idx, CategoricalIndex): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): @@ -700,8 +698,8 @@ def test_difference_base(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.difference([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3]) def test_symmetric_difference(self): for name, idx in compat.iteritems(self.indices): @@ -720,8 +718,8 @@ def test_symmetric_difference(self): for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.symmetric_difference(case) + with pytest.raises(ValueError, match=msg): + first.symmetric_difference(case) elif isinstance(idx, CategoricalIndex): pass else: @@ -730,7 +728,7 @@ def test_symmetric_difference(self): if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): first.symmetric_difference([1, 2, 3]) def test_insert_base(self): @@ -767,7 +765,7 @@ def test_delete_base(self): with pytest.raises((IndexError, ValueError)): # either depending on numpy version - result = idx.delete(len(idx)) + idx.delete(len(idx)) def test_equals(self): @@ -799,7 +797,7 @@ def test_equals_op(self): index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) @@ -811,7 +809,7 @@ def test_equals_op(self): array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) @@ -821,23 +819,23 @@ def test_equals_op(self): series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == index_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == array_d msg = "Can only compare identically-labeled Series objects" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): series_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding @@ -947,7 +945,7 @@ def test_fillna(self): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.fillna(idx[0]) else: idx = index.copy() @@ -956,7 +954,7 @@ def test_fillna(self): assert result is not idx msg = "'value' must be a scalar, passed: " - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.fillna([idx[0]]) idx = index.copy() @@ -990,7 +988,7 @@ def test_nulls(self): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.isna() else: diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index a9cfc551e073b..4b8ead71ed74c 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -169,7 +169,7 @@ def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) msg = 'Cannot cast DatetimeIndex to dtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.astype(dtype) def test_index_convert_to_datetime_array(self): diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 7a251a8ecfb28..04b2c4f280588 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -253,8 +253,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') - with tm.assert_raises_regex(TypeError, - 'data is already tz-aware'): + with pytest.raises(TypeError, match='data is already tz-aware'): DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx') @@ -264,8 +263,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx') - with tm.assert_raises_regex(TypeError, - 'data is already tz-aware'): + with pytest.raises(TypeError, match='data is already tz-aware'): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index([pd.NaT, Timestamp('2011-01-01 10:00'), @@ -314,7 +312,7 @@ def test_constructor_coverage(self): tm.assert_index_equal(rng, exp) msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): DatetimeIndex(start='1/1/2000', periods='foo', freq='D') pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index b6bab272c8c0a..06b52dfc407cf 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -167,7 +167,7 @@ def test_date_range_ambiguous_arguments(self): msg = ('Of the four parameters: start, end, periods, and ' 'freq, exactly three must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(start, end, periods=10, freq='s') def test_date_range_convenience_periods(self): @@ -245,25 +245,25 @@ def test_range_misspecified(self): msg = ('Of the four parameters: start, end, periods, and ' 'freq, exactly three must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(start='1/1/2000') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(end='1/1/2000') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(periods=10) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(start='1/1/2000', freq='H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(end='1/1/2000', freq='H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range(periods=10, freq='H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range() @pytest.mark.parametrize('f', [compat.long, int]) @@ -311,7 +311,7 @@ def test_construct_with_different_start_end_string_format(self): def test_error_with_zero_monthends(self): msg = r'Offset <0 \* MonthEnds> did not increment date' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range('1/1/2000', '1/1/2001', freq=MonthEnd(0)) def test_range_bug(self): @@ -515,7 +515,7 @@ def test_timezone_comparaison_bug(self): def test_timezone_comparaison_assert(self): start = Timestamp('20130220 10:00', tz='US/Eastern') msg = 'Inferred time zone not equal to passed time zone' - with tm.assert_raises_regex(AssertionError, msg): + with pytest.raises(AssertionError, match=msg): date_range(start, periods=2, tz='Europe/Berlin') def test_negative_non_tick_frequency_descending_dates(self, @@ -613,14 +613,14 @@ def test_constructor(self): bdate_range(end=START, periods=20, freq=BDay()) msg = 'periods must be a number, got B' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): date_range('2011-1-1', '2012-1-1', 'B') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): bdate_range('2011-1-1', '2012-1-1', 'B') msg = 'freq must be specified for bdate_range; use date_range instead' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): bdate_range(START, END, periods=10, freq=None) def test_naive_aware_conflicts(self): @@ -628,10 +628,10 @@ def test_naive_aware_conflicts(self): aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") msg = 'tz-naive.*tz-aware' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): naive.join(aware) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): aware.join(naive) def test_misc(self): @@ -688,10 +688,10 @@ def test_constructor(self): bdate_range(end=START, periods=20, freq=CDay()) msg = 'periods must be a number, got C' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): date_range('2011-1-1', '2012-1-1', 'C') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): bdate_range('2011-1-1', '2012-1-1', 'C') def test_misc(self): @@ -726,7 +726,7 @@ def test_cdaterange_weekmask(self): # raise with non-custom freq msg = ('a custom frequency string is required when holidays or ' 'weekmask are passed, got frequency B') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): bdate_range('2013-05-01', periods=3, weekmask='Sun Mon Tue Wed Thu') @@ -739,7 +739,7 @@ def test_cdaterange_holidays(self): # raise with non-custom freq msg = ('a custom frequency string is required when holidays or ' 'weekmask are passed, got frequency B') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): bdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) def test_cdaterange_weekmask_and_holidays(self): @@ -752,7 +752,7 @@ def test_cdaterange_weekmask_and_holidays(self): # raise with non-custom freq msg = ('a custom frequency string is required when holidays or ' 'weekmask are passed, got frequency B') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): bdate_range('2013-05-01', periods=3, weekmask='Sun Mon Tue Wed Thu', holidays=['2013-05-01']) @@ -767,5 +767,5 @@ def test_all_custom_freq(self, freq): bad_freq = freq + 'FOO' msg = 'invalid custom frequency string: {freq}' - with tm.assert_raises_regex(ValueError, msg.format(freq=bad_freq)): + with pytest.raises(ValueError, match=msg.format(freq=bad_freq)): bdate_range(START, END, freq=bad_freq) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index cea56bf803083..4363777d25235 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -95,8 +95,8 @@ def test_week_of_month_frequency(self): def test_hash_error(self): index = date_range('20010101', periods=10) - with tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__): + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): hash(index) def test_stringified_slice_with_tz(self): @@ -303,9 +303,8 @@ def test_join_with_period_index(self, join_type): c_idx_type='p', r_idx_type='dt') s = df.iloc[:5, 0] - with tm.assert_raises_regex(ValueError, - 'can only call with other ' - 'PeriodIndex-ed objects'): + msg = 'can only call with other PeriodIndex-ed objects' + with pytest.raises(ValueError, match=msg): df.columns.join(s.index, how=join_type) def test_factorize(self): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index b66475612fe40..f75b5867e1511 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -181,16 +181,16 @@ def test_take_invalid_kwargs(self): indices = [1, 6, 5, 9, 10, 13, 15, 3] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') # TODO: This method came from test_datetime; de-dup with version above @pytest.mark.parametrize('tz', [None, 'US/Eastern', 'Asia/Tokyo']) @@ -237,9 +237,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -268,9 +268,9 @@ def test_take_fill_value_with_timezone(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -411,8 +411,8 @@ def test_delete(self): assert result.freq == expected.freq with pytest.raises((IndexError, ValueError)): - # either depeidnig on numpy version - result = idx.delete(5) + # either depending on numpy version + idx.delete(5) for tz in [None, 'Asia/Tokyo', 'US/Pacific']: idx = date_range(start='2000-01-01 09:00', periods=10, freq='H', @@ -508,8 +508,7 @@ def test_get_loc(self): tolerance=np.timedelta64(1, 'D')) == 1 assert idx.get_loc('2000-01-01T12', method='nearest', tolerance=timedelta(1)) == 1 - with tm.assert_raises_regex(ValueError, - 'unit abbreviation w/o a number'): + with pytest.raises(ValueError, match='unit abbreviation w/o a number'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') with pytest.raises(KeyError): idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') @@ -583,12 +582,11 @@ def test_get_indexer(self): with pytest.raises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') - def test_reasonable_keyerror(self): + def test_reasonable_key_error(self): # GH#1062 index = DatetimeIndex(['1/3/2000']) - with pytest.raises(KeyError) as excinfo: + with pytest.raises(KeyError, match='2000'): index.get_loc('1/1/2000') - assert '2000' in str(excinfo.value) @pytest.mark.parametrize('key', [pd.Timedelta(0), pd.Timedelta(1), diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 2cb7482cda617..d599af6180bfb 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -84,17 +84,21 @@ def test_numpy_minmax(self): assert np.max(dr) == Timestamp('2016-01-20 00:00:00', freq='D') errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, errmsg, np.min, dr, out=0) - tm.assert_raises_regex(ValueError, errmsg, np.max, dr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.min(dr, out=0) + + with pytest.raises(ValueError, match=errmsg): + np.max(dr, out=0) assert np.argmin(dr) == 0 assert np.argmax(dr) == 5 errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex( - ValueError, errmsg, np.argmin, dr, out=0) - tm.assert_raises_regex( - ValueError, errmsg, np.argmax, dr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmin(dr, out=0) + + with pytest.raises(ValueError, match=errmsg): + np.argmax(dr, out=0) def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture @@ -148,8 +152,8 @@ def test_repeat(self, tz_naive_fixture): assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) - tm.assert_raises_regex(ValueError, msg, np.repeat, - rng, reps, axis=1) + with pytest.raises(ValueError, match=msg): + np.repeat(rng, reps, axis=1) def test_resolution(self, tz_naive_fixture): tz = tz_naive_fixture @@ -415,11 +419,11 @@ def test_freq_setter_errors(self): # setting with an incompatible freq msg = ('Inferred frequency 2D from passed values does not conform to ' 'passed frequency 5D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.freq = '5D' # setting with non-freq string - with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + with pytest.raises(ValueError, match='Invalid frequency'): idx.freq = 'foo' def test_offset_deprecated(self): diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 27e53c15238be..e6e19c6a8200d 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -66,12 +66,12 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), date_range('2014-01-01', periods=20, freq='MS')) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] def test_slice_bounds_empty(self): # GH 14354 @@ -222,8 +222,8 @@ def test_partial_slice_second_precision(self): tm.assert_series_equal(s['2005-1-1 00:01:00'], s.iloc[10:]) assert s[Timestamp('2005-1-1 00:00:59.999990')] == s.iloc[0] - tm.assert_raises_regex(KeyError, '2005-1-1 00:00:00', - lambda: s['2005-1-1 00:00:00']) + with pytest.raises(KeyError, match='2005-1-1 00:00:00'): + s['2005-1-1 00:00:00'] def test_partial_slicing_dataframe(self): # GH14856 @@ -349,14 +349,14 @@ def test_partial_slice_doesnt_require_monotonicity(self): timestamp = pd.Timestamp('2014-01-10') tm.assert_series_equal(nonmonotonic['2014-01-10':], expected) - tm.assert_raises_regex(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic[timestamp:]) + with pytest.raises(KeyError, + match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic[timestamp:] tm.assert_series_equal(nonmonotonic.loc['2014-01-10':], expected) - tm.assert_raises_regex(KeyError, - r"Timestamp\('2014-01-10 00:00:00'\)", - lambda: nonmonotonic.loc[timestamp:]) + with pytest.raises(KeyError, + match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic.loc[timestamp:] def test_loc_datetime_length_one(self): # GH16071 diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index b644cb5844d9b..81f4c77009ce4 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -97,14 +97,16 @@ def test_round(self, tz_naive_fixture): assert elt.round(freq='H') == expected_elt msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): rng.round(freq='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): elt.round(freq='foo') msg = " is a non-fixed frequency" - tm.assert_raises_regex(ValueError, msg, rng.round, freq='M') - tm.assert_raises_regex(ValueError, msg, elt.round, freq='M') + with pytest.raises(ValueError, match=msg): + rng.round(freq='M') + with pytest.raises(ValueError, match=msg): + elt.round(freq='M') # GH#14440 & GH#15578 index = DatetimeIndex(['2016-10-17 12:00:00.0015'], tz=tz) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 8c66b68c94946..c24c1025ea63c 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -586,7 +586,7 @@ def test_week_without_day_and_calendar_year(self, date, format): # GH16774 msg = "Cannot use '%W' or '%U' without day and year" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.to_datetime(date, format=format) def test_iso_8601_strings_with_same_offset(self): @@ -865,7 +865,7 @@ def test_dataframe(self, cache): msg = ("cannot assemble the datetimes: time data .+ does not " r"match format '%Y%m%d' \(match\)") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) result = to_datetime(df2, errors='coerce', cache=cache) expected = Series([Timestamp('20150204 00:00:00'), @@ -875,7 +875,7 @@ def test_dataframe(self, cache): # extra columns msg = ("extra keys have been passed to the datetime assemblage: " r"\[foo\]") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df2 = df.copy() df2['foo'] = 1 to_datetime(df2, cache=cache) @@ -888,7 +888,7 @@ def test_dataframe(self, cache): ['year', 'month', 'second'], ['month', 'day'], ['year', 'day', 'second']]: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_datetime(df[c], cache=cache) # duplicates @@ -897,7 +897,7 @@ def test_dataframe(self, cache): 'month': [2, 20], 'day': [4, 5]}) df2.columns = ['year', 'year', 'day'] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) df2 = DataFrame({'year': [2015, 2016], @@ -905,7 +905,7 @@ def test_dataframe(self, cache): 'day': [4, 5], 'hour': [4, 5]}) df2.columns = ['year', 'month', 'day', 'day'] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) @pytest.mark.parametrize('cache', [True, False]) diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py index 4389a22641b72..8bcd6ef5dcc5a 100644 --- a/pandas/tests/indexes/interval/test_astype.py +++ b/pandas/tests/indexes/interval/test_astype.py @@ -49,12 +49,12 @@ def test_astype_category(self, index): 'datetime64[ns, US/Eastern]']) def test_astype_cannot_cast(self, index, dtype): msg = 'Cannot cast IntervalIndex to dtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) def test_astype_invalid_dtype(self, index): msg = "data type 'fake_dtype' not understood" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype('fake_dtype') @@ -127,7 +127,7 @@ def test_subtype_integer(self, subtype): # raises with NA msg = 'Cannot convert NA to integer' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.insert(0, np.nan).astype(dtype) @pytest.mark.xfail(reason='GH#15832', strict=True) @@ -152,7 +152,7 @@ def test_subtype_integer_errors(self): def test_subtype_datetimelike(self, index, subtype): dtype = IntervalDtype(subtype) msg = 'Cannot convert .* to .*; subtypes are incompatible' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) @@ -183,7 +183,7 @@ def test_subtype_integer(self, index, subtype): def test_subtype_float(self, index): dtype = IntervalDtype('float64') msg = 'Cannot convert .* to .*; subtypes are incompatible' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) def test_subtype_datetimelike(self): @@ -192,15 +192,15 @@ def test_subtype_datetimelike(self): msg = 'Cannot convert .* to .*; subtypes are incompatible' index = interval_range(Timestamp('2018-01-01'), periods=10) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) index = interval_range(Timestamp('2018-01-01', tz='CET'), periods=10) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) # timedelta -> datetime raises dtype = IntervalDtype('datetime64[ns]') index = interval_range(Timedelta('0 days'), periods=10) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.astype(dtype) diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py index a937dbc40a843..d07c11012a86b 100644 --- a/pandas/tests/indexes/interval/test_construction.py +++ b/pandas/tests/indexes/interval/test_construction.py @@ -107,7 +107,7 @@ def test_constructor_string(self, constructor, breaks): # GH 19016 msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor(**self.get_kwargs_from_breaks(breaks)) @pytest.mark.parametrize('cat_constructor', [ @@ -132,30 +132,30 @@ def test_generic_errors(self, constructor): # invalid closed msg = "invalid option for 'closed': invalid" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): constructor(closed='invalid', **filler) # unsupported dtype msg = 'dtype must be an IntervalDtype, got int64' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor(dtype='int64', **filler) # invalid dtype msg = "data type 'invalid' not understood" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor(dtype='invalid', **filler) # no point in nesting periods in an IntervalIndex periods = period_range('2000-01-01', periods=10) periods_kwargs = self.get_kwargs_from_breaks(periods) msg = 'Period dtypes are not supported, use a PeriodIndex instead' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): constructor(**periods_kwargs) # decreasing values decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1)) msg = 'left side of interval must be <= right side' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): constructor(**decreasing_kwargs) @@ -178,14 +178,14 @@ def test_constructor_errors(self): data = Categorical(list('01234abcde'), ordered=True) msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalIndex.from_arrays(data[:-1], data[1:]) # unequal length left = [0, 1, 2] right = [2, 3] msg = 'left and right must have the same length' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): IntervalIndex.from_arrays(left, right) @pytest.mark.parametrize('left_subtype, right_subtype', [ @@ -224,7 +224,7 @@ def test_constructor_errors(self): data = Categorical(list('01234abcde'), ordered=True) msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): IntervalIndex.from_breaks(data) def test_length_one(self): @@ -261,17 +261,17 @@ def test_constructor_errors(self): # non-tuple tuples = [(0, 1), 2, (3, 4)] msg = 'IntervalIndex.from_tuples received an invalid item, 2' - with tm.assert_raises_regex(TypeError, msg.format(t=tuples)): + with pytest.raises(TypeError, match=msg.format(t=tuples)): IntervalIndex.from_tuples(tuples) # too few/many items tuples = [(0, 1), (2,), (3, 4)] msg = 'IntervalIndex.from_tuples requires tuples of length 2, got {t}' - with tm.assert_raises_regex(ValueError, msg.format(t=tuples)): + with pytest.raises(ValueError, match=msg.format(t=tuples)): IntervalIndex.from_tuples(tuples) tuples = [(0, 1), (2, 3, 4), (5, 6)] - with tm.assert_raises_regex(ValueError, msg.format(t=tuples)): + with pytest.raises(ValueError, match=msg.format(t=tuples)): IntervalIndex.from_tuples(tuples) def test_na_tuples(self): @@ -318,19 +318,19 @@ def test_constructor_errors(self, constructor): # mismatched closed within intervals with no constructor override ivs = [Interval(0, 1, closed='right'), Interval(2, 3, closed='left')] msg = 'intervals must all be closed on the same side' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): constructor(ivs) # scalar msg = (r'IntervalIndex\(...\) must be called with a collection of ' 'some kind, 5 was passed') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor(5) # not an interval msg = ("type <(class|type) 'numpy.int64'> with value 0 " "is not an interval") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): constructor([0, 1]) @pytest.mark.parametrize('data, closed', [ diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 49d093d312cf1..d5f62429ddb73 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -214,13 +214,13 @@ def test_insert(self, data): # invalid type msg = 'can only insert Interval objects and NA into an IntervalIndex' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): data.insert(1, 'foo') # invalid closed msg = 'inserted item must be closed on the same side as the index' for closed in {'left', 'right', 'both', 'neither'} - {item.closed}: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) @@ -690,7 +690,7 @@ def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): msg = ('Cannot index an IntervalIndex of subtype {dtype1} with ' 'values of dtype {dtype2}') msg = re.escape(msg.format(dtype1=breaks1.dtype, dtype2=breaks2.dtype)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) # To be removed, replaced by test_interval_new.py (see #16316, #16386) @@ -842,7 +842,7 @@ def test_set_operation_errors(self, closed, op_name): # non-IntervalIndex msg = ('the other index needs to be an IntervalIndex too, but ' 'was type Int64Index') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): set_op(Index([1, 2, 3])) # mixed closed @@ -850,14 +850,14 @@ def test_set_operation_errors(self, closed, op_name): 'that are closed on the same side') for other_closed in {'right', 'left', 'both', 'neither'} - {closed}: other = self.create_index(closed=other_closed) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): set_op(other) # GH 19016: incompatible dtypes other = interval_range(Timestamp('20180101'), periods=9, closed=closed) msg = ('can only do {op} between two IntervalIndex objects that have ' 'compatible dtypes').format(op=op_name) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): set_op(other) def test_isin(self, closed): @@ -934,9 +934,9 @@ def test_comparison(self): actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) - with tm.assert_raises_regex(TypeError, 'unorderable types'): + with pytest.raises(TypeError, match='unorderable types'): self.index > 0 - with tm.assert_raises_regex(TypeError, 'unorderable types'): + with pytest.raises(TypeError, match='unorderable types'): self.index <= 0 with pytest.raises(TypeError): self.index > np.arange(2) @@ -1039,7 +1039,7 @@ def test_append(self, closed): for other_closed in {'left', 'right', 'both', 'neither'} - {closed}: index_other_closed = IntervalIndex.from_arrays( [0, 1], [1, 2], closed=other_closed) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): @@ -1148,7 +1148,7 @@ def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) def test_is_all_dates(self): diff --git a/pandas/tests/indexes/interval/test_interval_new.py b/pandas/tests/indexes/interval/test_interval_new.py index 80905e13e9525..b4510f8f62bdf 100644 --- a/pandas/tests/indexes/interval/test_interval_new.py +++ b/pandas/tests/indexes/interval/test_interval_new.py @@ -199,7 +199,7 @@ def test_get_indexer_errors(self, tuples, closed): msg = ('cannot handle overlapping indices; use ' 'IntervalIndex.get_indexer_non_unique') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.get_indexer([0, 2]) @pytest.mark.parametrize('query, expected', [ diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py index 9e11c357c075d..87bbf53cd56e0 100644 --- a/pandas/tests/indexes/interval/test_interval_range.py +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -232,84 +232,84 @@ def test_errors(self): msg = ('Of the four parameters: start, end, periods, and freq, ' 'exactly three must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start=0) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(end=5) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(periods=2) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range() # too many params - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start=0, end=5, periods=6, freq=1.5) # mixed units msg = 'start, end, freq need to be type compatible' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=0, end=Timestamp('20130101'), freq=2) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=0, end=Timedelta('1 day'), freq=2) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=0, end=10, freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timestamp('20130101'), end=10, freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timestamp('20130101'), end=Timedelta('1 day'), freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timestamp('20130101'), end=Timestamp('20130110'), freq=2) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timedelta('1 day'), end=10, freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timedelta('1 day'), end=Timestamp('20130110'), freq='D') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=Timedelta('1 day'), end=Timedelta('10 days'), freq=2) # invalid periods msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=0, periods='foo') # invalid start msg = 'start must be numeric or datetime-like, got foo' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start='foo', periods=10) # invalid end msg = r'end must be numeric or datetime-like, got \(0, 1\]' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(end=Interval(0, 1), periods=10) # invalid freq for datetime-like msg = 'freq must be numeric or convertible to DateOffset, got foo' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start=0, end=10, freq='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(start=Timestamp('20130101'), periods=10, freq='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): interval_range(end=Timedelta('1 day'), periods=10, freq='foo') # mixed tz start = Timestamp('2017-01-01', tz='US/Eastern') end = Timestamp('2017-01-07', tz='US/Pacific') msg = 'Start and end cannot both be tz-aware with different timezones' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval_range(start=start, end=end) diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 8d602b0bb2b1d..05adaada01ee5 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -77,8 +77,8 @@ def f(): def test_reorder_levels(idx): # this blows up - tm.assert_raises_regex(IndexError, '^Too many levels', - idx.reorder_levels, [2, 1, 0]) + with pytest.raises(IndexError, match='^Too many levels'): + idx.reorder_levels([2, 1, 0]) def test_numpy_repeat(): @@ -93,8 +93,8 @@ def test_numpy_repeat(): tm.assert_index_equal(np.repeat(m, reps), expected) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex( - ValueError, msg, np.repeat, m, reps, axis=1) + with pytest.raises(ValueError, match=msg): + np.repeat(m, reps, axis=1) def test_append_mixed_dtypes(): @@ -151,16 +151,16 @@ def test_take_invalid_kwargs(idx): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') def test_take_fill_value(): @@ -195,9 +195,9 @@ def test_take_fill_value(): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py index 5da96717bc077..70d79ddfdc22e 100644 --- a/pandas/tests/indexes/multi/test_astype.py +++ b/pandas/tests/indexes/multi/test_astype.py @@ -3,7 +3,6 @@ import numpy as np import pytest -import pandas.util.testing as tm from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.util.testing import assert_copy @@ -15,7 +14,7 @@ def test_astype(idx): assert_copy(actual.labels, expected.labels) assert [level.name for level in actual.levels] == list(expected.names) - with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"): + with pytest.raises(TypeError, match="^Setting.*dtype.*object"): idx.astype(np.dtype(int)) @@ -23,10 +22,10 @@ def test_astype(idx): def test_astype_category(idx, ordered): # GH 18630 msg = '> 1 ndim Categorical are not supported at this time' - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.astype(CategoricalDtype(ordered=ordered)) if ordered is False: # dtype='category' defaults to ordered=False, so only test once - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.astype('category') diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py index f05b53522fa31..23ea0c306d47c 100644 --- a/pandas/tests/indexes/multi/test_compat.py +++ b/pandas/tests/indexes/multi/test_compat.py @@ -11,27 +11,34 @@ def test_numeric_compat(idx): - tm.assert_raises_regex(TypeError, "cannot perform __mul__", - lambda: idx * 1) - tm.assert_raises_regex(TypeError, "cannot perform __rmul__", - lambda: 1 * idx) - - div_err = "cannot perform __truediv__" if PY3 \ - else "cannot perform __div__" - tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1) - div_err = div_err.replace(' __', ' __r') - tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx) - tm.assert_raises_regex(TypeError, "cannot perform __floordiv__", - lambda: idx // 1) - tm.assert_raises_regex(TypeError, "cannot perform __rfloordiv__", - lambda: 1 // idx) - - -def test_logical_compat(idx): - tm.assert_raises_regex(TypeError, 'cannot perform all', - lambda: idx.all()) - tm.assert_raises_regex(TypeError, 'cannot perform any', - lambda: idx.any()) + with pytest.raises(TypeError, match="cannot perform __mul__"): + idx * 1 + + with pytest.raises(TypeError, match="cannot perform __rmul__"): + 1 * idx + + div_err = ("cannot perform __truediv__" if PY3 + else "cannot perform __div__") + with pytest.raises(TypeError, match=div_err): + idx / 1 + + div_err = div_err.replace(" __", " __r") + with pytest.raises(TypeError, match=div_err): + 1 / idx + + with pytest.raises(TypeError, match="cannot perform __floordiv__"): + idx // 1 + + with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): + 1 // idx + + +@pytest.mark.parametrize("method", ["all", "any"]) +def test_logical_compat(idx, method): + msg = "cannot perform {method}".format(method=method) + + with pytest.raises(TypeError, match=msg): + getattr(idx, method)() def test_boolean_context_compat(idx): diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 833de283e5367..fb15d674613d4 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -25,13 +25,14 @@ def test_constructor_single_level(): def test_constructor_no_levels(): - tm.assert_raises_regex(ValueError, "non-zero number " - "of levels/labels", - MultiIndex, levels=[], labels=[]) + msg = "non-zero number of levels/labels" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=[], labels=[]) + both_re = re.compile('Must pass both levels and labels') - with tm.assert_raises_regex(TypeError, both_re): + with pytest.raises(TypeError, match=both_re): MultiIndex(levels=[]) - with tm.assert_raises_regex(TypeError, both_re): + with pytest.raises(TypeError, match=both_re): MultiIndex(labels=[]) @@ -39,44 +40,48 @@ def test_constructor_nonhashable_names(): # GH 20527 levels = [[1, 2], [u'one', u'two']] labels = [[0, 0, 1, 1], [0, 1, 0, 1]] - names = ((['foo'], ['bar'])) + names = (['foo'], ['bar']) message = "MultiIndex.name must be a hashable type" - tm.assert_raises_regex(TypeError, message, - MultiIndex, levels=levels, - labels=labels, names=names) + with pytest.raises(TypeError, match=message): + MultiIndex(levels=levels, labels=labels, names=names) # With .rename() mi = MultiIndex(levels=[[1, 2], [u'one', u'two']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=('foo', 'bar')) renamed = [['foor'], ['barr']] - tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed) + with pytest.raises(TypeError, match=message): + mi.rename(names=renamed) + # With .set_names() - tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed) + with pytest.raises(TypeError, match=message): + mi.set_names(names=renamed) def test_constructor_mismatched_label_levels(idx): labels = [np.array([1]), np.array([2]), np.array([3])] levels = ["a"] - tm.assert_raises_regex(ValueError, "Length of levels and labels " - "must be the same", MultiIndex, - levels=levels, labels=labels) + + msg = "Length of levels and labels must be the same" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=levels, labels=labels) + length_error = re.compile('>= length of level') label_error = re.compile(r'Unequal label lengths: \[4, 2\]') # important to check that it's looking at the right thing. - with tm.assert_raises_regex(ValueError, length_error): + with pytest.raises(ValueError, match=length_error): MultiIndex(levels=[['a'], ['b']], labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) - with tm.assert_raises_regex(ValueError, label_error): + with pytest.raises(ValueError, match=label_error): MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) # external API - with tm.assert_raises_regex(ValueError, length_error): + with pytest.raises(ValueError, match=length_error): idx.copy().set_levels([['a'], ['b']]) - with tm.assert_raises_regex(ValueError, label_error): + with pytest.raises(ValueError, match=label_error): idx.copy().set_labels([[0, 0, 0, 0], [0, 0]]) @@ -121,8 +126,8 @@ def test_from_arrays_iterator(idx): tm.assert_index_equal(result, idx) # invalid iterator input - with tm.assert_raises_regex( - TypeError, "Input must be a list / sequence of array-likes."): + msg = "Input must be a list / sequence of array-likes." + with pytest.raises(TypeError, match=msg): MultiIndex.from_arrays(0) @@ -217,8 +222,8 @@ def test_from_arrays_index_series_categorical(): def test_from_arrays_empty(): # 0 levels - with tm.assert_raises_regex( - ValueError, "Must pass non-zero number of levels/labels"): + msg = "Must pass non-zero number of levels/labels" + with pytest.raises(ValueError, match=msg): MultiIndex.from_arrays(arrays=[]) # 1 level @@ -261,15 +266,15 @@ def test_from_arrays_invalid_input(invalid_array): ]) def test_from_arrays_different_lengths(idx1, idx2): # see gh-13599 - tm.assert_raises_regex(ValueError, '^all arrays must ' - 'be same length$', - MultiIndex.from_arrays, [idx1, idx2]) + msg = '^all arrays must be same length$' + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays([idx1, idx2]) def test_from_tuples(): - tm.assert_raises_regex(TypeError, 'Cannot infer number of levels ' - 'from empty list', - MultiIndex.from_tuples, []) + msg = 'Cannot infer number of levels from empty list' + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples([]) expected = MultiIndex(levels=[[1, 3], [2, 4]], labels=[[0, 1], [0, 1]], @@ -291,8 +296,8 @@ def test_from_tuples_iterator(): tm.assert_index_equal(result, expected) # input non-iterables - with tm.assert_raises_regex( - TypeError, 'Input must be a list / sequence of tuple-likes.'): + msg = 'Input must be a list / sequence of tuple-likes.' + with pytest.raises(TypeError, match=msg): MultiIndex.from_tuples(0) @@ -311,8 +316,8 @@ def test_from_tuples_index_values(idx): def test_from_product_empty_zero_levels(): # 0 levels - with tm.assert_raises_regex( - ValueError, "Must pass non-zero number of levels/labels"): + msg = "Must pass non-zero number of levels/labels" + with pytest.raises(ValueError, match=msg): MultiIndex.from_product([]) @@ -422,8 +427,8 @@ def test_from_product_iterator(): tm.assert_index_equal(result, expected) # Invalid non-iterable input - with tm.assert_raises_regex( - TypeError, "Input must be a list / sequence of iterables."): + msg = "Input must be a list / sequence of iterables." + with pytest.raises(TypeError, match=msg): MultiIndex.from_product(0) diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 1daccefcfe876..79494a7c77cbd 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- - +import pytest import numpy as np import pandas as pd @@ -51,11 +51,11 @@ def test_to_frame(): tm.assert_frame_equal(result, expected) msg = "'name' must be a list / sequence of column names." - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.to_frame(name='first') msg = "'name' should have same length as number of levels on index." - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.to_frame(name=['first']) # Tests for datetime index diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index 41cb2409f0532..bd1f313897ea2 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- - import numpy as np +import pytest from pandas.compat import lrange, lzip, range @@ -35,7 +35,7 @@ def test_equals_op(idx): index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) @@ -47,7 +47,7 @@ def test_equals_op(idx): array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) @@ -57,23 +57,23 @@ def test_equals_op(idx): series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == index_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): index_a == array_d msg = "Can only compare identically-labeled Series objects" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): series_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index e72b76ed07269..a5f586bd98d5f 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -26,8 +26,8 @@ def test_get_level_number_integer(idx): assert idx._get_level_number(1) == 0 assert idx._get_level_number(0) == 1 pytest.raises(IndexError, idx._get_level_number, 2) - tm.assert_raises_regex(KeyError, 'Level fourth not found', - idx._get_level_number, 'fourth') + with pytest.raises(KeyError, match='Level fourth not found'): + idx._get_level_number('fourth') def test_get_level_values(idx): @@ -125,7 +125,7 @@ def test_set_name_methods(idx, index_names): ind = idx.set_names(new_names) assert idx.names == index_names assert ind.names == new_names - with tm.assert_raises_regex(ValueError, "^Length"): + with pytest.raises(ValueError, match="^Length"): ind.set_names(new_names + new_names) new_names2 = [name + "SUFFIX2" for name in new_names] res = ind.set_names(new_names2, inplace=True) @@ -226,23 +226,23 @@ def test_set_levels(idx): # GH 13754 original_index = idx.copy() for inplace in [True, False]: - with tm.assert_raises_regex(ValueError, "^On"): + with pytest.raises(ValueError, match="^On"): idx.set_levels(['c'], level=0, inplace=inplace) assert_matching(idx.levels, original_index.levels, check_dtype=True) - with tm.assert_raises_regex(ValueError, "^On"): + with pytest.raises(ValueError, match="^On"): idx.set_labels([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) assert_matching(idx.labels, original_index.labels, check_dtype=True) - with tm.assert_raises_regex(TypeError, "^Levels"): + with pytest.raises(TypeError, match="^Levels"): idx.set_levels('c', level=0, inplace=inplace) assert_matching(idx.levels, original_index.levels, check_dtype=True) - with tm.assert_raises_regex(TypeError, "^Labels"): + with pytest.raises(TypeError, match="^Labels"): idx.set_labels(1, level=0, inplace=inplace) assert_matching(idx.labels, original_index.labels, check_dtype=True) @@ -323,46 +323,46 @@ def test_set_levels_labels_names_bad_input(idx): levels, labels = idx.levels, idx.labels names = idx.names - with tm.assert_raises_regex(ValueError, 'Length of levels'): + with pytest.raises(ValueError, match='Length of levels'): idx.set_levels([levels[0]]) - with tm.assert_raises_regex(ValueError, 'Length of labels'): + with pytest.raises(ValueError, match='Length of labels'): idx.set_labels([labels[0]]) - with tm.assert_raises_regex(ValueError, 'Length of names'): + with pytest.raises(ValueError, match='Length of names'): idx.set_names([names[0]]) # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list of lists-like'): + with pytest.raises(TypeError, match='list of lists-like'): idx.set_levels(levels[0]) # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list of lists-like'): + with pytest.raises(TypeError, match='list of lists-like'): idx.set_labels(labels[0]) # shouldn't scalar data error, instead should demand list-like - with tm.assert_raises_regex(TypeError, 'list-like'): + with pytest.raises(TypeError, match='list-like'): idx.set_names(names[0]) # should have equal lengths - with tm.assert_raises_regex(TypeError, 'list of lists-like'): + with pytest.raises(TypeError, match='list of lists-like'): idx.set_levels(levels[0], level=[0, 1]) - with tm.assert_raises_regex(TypeError, 'list-like'): + with pytest.raises(TypeError, match='list-like'): idx.set_levels(levels, level=0) # should have equal lengths - with tm.assert_raises_regex(TypeError, 'list of lists-like'): + with pytest.raises(TypeError, match='list of lists-like'): idx.set_labels(labels[0], level=[0, 1]) - with tm.assert_raises_regex(TypeError, 'list-like'): + with pytest.raises(TypeError, match='list-like'): idx.set_labels(labels, level=0) # should have equal lengths - with tm.assert_raises_regex(ValueError, 'Length of names'): + with pytest.raises(ValueError, match='Length of names'): idx.set_names(names[0], level=[0, 1]) - with tm.assert_raises_regex(TypeError, 'Names must be a'): + with pytest.raises(TypeError, match='Names must be a'): idx.set_names(names, level=0) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 563027364134d..23f48db751804 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -54,19 +54,17 @@ def test_slice_locs_with_type_mismatch(): df = tm.makeTimeDataFrame() stacked = df.stack() idx = stacked.index - tm.assert_raises_regex(TypeError, '^Level type mismatch', - idx.slice_locs, (1, 3)) - tm.assert_raises_regex(TypeError, '^Level type mismatch', - idx.slice_locs, - df.index[5] + timedelta( - seconds=30), (5, 2)) + with pytest.raises(TypeError, match='^Level type mismatch'): + idx.slice_locs((1, 3)) + with pytest.raises(TypeError, match='^Level type mismatch'): + idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) df = tm.makeCustomDataframe(5, 5) stacked = df.stack() idx = stacked.index - with tm.assert_raises_regex(TypeError, '^Level type mismatch'): + with pytest.raises(TypeError, match='^Level type mismatch'): idx.slice_locs(timedelta(seconds=30)) # TODO: Try creating a UnicodeDecodeError in exception message - with tm.assert_raises_regex(TypeError, '^Level type mismatch'): + with pytest.raises(TypeError, match='^Level type mismatch'): idx.slice_locs(df.index[1], (16, "a")) @@ -75,9 +73,9 @@ def test_slice_locs_not_sorted(): lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than " - "MultiIndex lexsort depth", - index.slice_locs, (1, 0, 1), (2, 1, 0)) + msg = "[Kk]ey length.*greater than MultiIndex lexsort depth" + with pytest.raises(KeyError, match=msg): + index.slice_locs((1, 0, 1), (2, 1, 0)) # works sorted_index, _ = index.sortlevel(0) @@ -172,7 +170,7 @@ def test_get_indexer(): idx2 = Index(lrange(20)) msg = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, msg): + with pytest.raises(InvalidIndexError, match=msg): idx1.get_indexer(idx2) @@ -218,8 +216,8 @@ def test_get_indexer_consistency(idx): assert indexer.dtype == np.intp else: e = "Reindexing only valid with uniquely valued Index objects" - with tm.assert_raises_regex(InvalidIndexError, e): - indexer = idx.get_indexer(idx[0:2]) + with pytest.raises(InvalidIndexError, match=e): + idx.get_indexer(idx[0:2]) indexer, _ = idx.get_indexer_non_unique(idx[0:2]) assert isinstance(indexer, np.ndarray) diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 4d08fa7cef7a4..2ec08fa89d133 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -139,16 +139,16 @@ def take_invalid_kwargs(): indices = [1, 2] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') def test_isna_behavior(idx): @@ -183,8 +183,8 @@ def test_million_record_attribute_error(): df = pd.DataFrame({'a': r, 'b': r}, index=pd.MultiIndex.from_tuples([(x, x) for x in r])) - with tm.assert_raises_regex(AttributeError, - "'Series' object has no attribute 'foo'"): + msg = "'Series' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): df['a'].foo() @@ -197,18 +197,18 @@ def test_metadata_immutable(idx): levels, labels = idx.levels, idx.labels # shouldn't be able to set at either the top level or base level mutable_regex = re.compile('does not support mutable operations') - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): levels[0] = levels[0] - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): levels[0][0] = levels[0][0] # ditto for labels - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): labels[0] = labels[0] - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): labels[0][0] = labels[0][0] # and for names names = idx.names - with tm.assert_raises_regex(TypeError, mutable_regex): + with pytest.raises(TypeError, match=mutable_regex): names[0] = names[0] @@ -248,8 +248,9 @@ def test_rangeindex_fallback_coercion_bug(): def test_hash_error(indices): index = indices - tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__, hash, indices) + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): + hash(indices) def test_mutability(indices): @@ -259,9 +260,8 @@ def test_mutability(indices): def test_wrong_number_names(indices): - def testit(ind): - ind.names = ["apple", "banana", "carrot"] - tm.assert_raises_regex(ValueError, "^Length", testit, indices) + with pytest.raises(ValueError, match="^Length"): + indices.names = ["apple", "banana", "carrot"] def test_memory_usage(idx): diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 8d89ad9f1cd0c..f50ee29ba31cd 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -46,8 +46,8 @@ def test_join_level_corner_case(idx): result = index.join(idx, level='second') assert isinstance(result, MultiIndex) - tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous", - idx.join, idx, level=1) + with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"): + idx.join(idx, level=1) def test_join_self(idx, join_type): diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 7a91ac6d96220..73e6579cf7771 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -20,7 +20,7 @@ def test_fillna(idx): elif isinstance(index, MultiIndex): idx = index.copy() msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.fillna(idx[0]) else: idx = index.copy() @@ -29,7 +29,7 @@ def test_fillna(idx): assert result is not idx msg = "'value' must be a scalar, passed: " - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.fillna([idx[0]]) idx = index.copy() @@ -71,7 +71,7 @@ def test_dropna(): tm.assert_index_equal(idx.dropna(how='all'), exp) msg = "invalid how option: xxx" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.dropna(how='xxx') @@ -80,7 +80,7 @@ def test_nulls(idx): # as these are adequately tested for function elsewhere msg = "isna is not defined for MultiIndex" - with tm.assert_raises_regex(NotImplementedError, msg): + with pytest.raises(NotImplementedError, match=msg): idx.isna() diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py index 68e8bb0cf58f2..1f63f1ef100c1 100644 --- a/pandas/tests/indexes/multi/test_names.py +++ b/pandas/tests/indexes/multi/test_names.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- - +import pytest import pandas as pd import pandas.util.testing as tm from pandas import MultiIndex @@ -92,23 +92,22 @@ def test_names(idx, index_names): # setting bad names on existing index = idx - tm.assert_raises_regex(ValueError, "^Length of names", - setattr, index, "names", - list(index.names) + ["third"]) - tm.assert_raises_regex(ValueError, "^Length of names", - setattr, index, "names", []) + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", list(index.names) + ["third"]) + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", []) # initializing with bad names (should always be equivalent) major_axis, minor_axis = idx.levels major_labels, minor_labels = idx.labels - tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first']) - tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first', 'second', 'third']) + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first']) + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first', 'second', 'third']) # names are assigned index.names = ["a", "b"] @@ -120,5 +119,5 @@ def test_names(idx, index_names): def test_duplicate_level_names_access_raises(idx): # GH19029 idx.names = ['foo', 'foo'] - tm.assert_raises_regex(ValueError, 'name foo occurs multiple times', - idx._get_level_number, 'foo') + with pytest.raises(ValueError, match='name foo occurs multiple times'): + idx._get_level_number('foo') diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index f7651ac258d48..049096ad92c76 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- - +import pytest import numpy as np import pandas as pd @@ -40,13 +40,11 @@ def test_reindex_level(idx): exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) - tm.assert_raises_regex(TypeError, "Fill method not supported", - idx.reindex, idx, - method='pad', level='second') + with pytest.raises(TypeError, match="Fill method not supported"): + idx.reindex(idx, method='pad', level='second') - tm.assert_raises_regex(TypeError, "Fill method not supported", - index.reindex, index, method='bfill', - level='first') + with pytest.raises(TypeError, match="Fill method not supported"): + index.reindex(index, method='bfill', level='first') def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): @@ -96,7 +94,7 @@ def test_reindex_base(idx): actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, 'Invalid fill method'): + with pytest.raises(ValueError, match='Invalid fill method'): idx.get_indexer(idx, method='invalid') @@ -104,6 +102,7 @@ def test_reindex_non_unique(): idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)]) a = pd.Series(np.arange(4), index=idx) new_idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) - with tm.assert_raises_regex(ValueError, - 'cannot handle a non-unique multi-index!'): + + msg = 'cannot handle a non-unique multi-index!' + with pytest.raises(ValueError, match=msg): a.reindex(new_idx) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 7750379bff445..dd747a0283e45 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -27,7 +27,7 @@ def test_insert(idx): # key wrong length msg = "Item must have length equal to number of levels" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.insert(0, ('foo2',)) left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 46d7a27e02aec..34da3df4fb16e 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import pytest import numpy as np import pandas as pd @@ -7,18 +8,14 @@ from pandas import MultiIndex, Series -def test_setops_errorcases(idx): - # # non-iterable input - cases = [0.5, 'xxx'] - methods = [idx.intersection, idx.union, idx.difference, - idx.symmetric_difference] - - for method in methods: - for case in cases: - tm.assert_raises_regex(TypeError, - "Input must be Index " - "or array-like", - method, case) +@pytest.mark.parametrize("case", [0.5, "xxx"]) +@pytest.mark.parametrize("method", ["intersection", "union", + "difference", "symmetric_difference"]) +def test_set_ops_error_cases(idx, case, method): + # non-iterable input + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(idx, method)(case) def test_intersection_base(idx): @@ -36,8 +33,8 @@ def test_intersection_base(idx): assert tm.equalContents(result, second) msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.intersection([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3]) def test_union_base(idx): @@ -55,8 +52,8 @@ def test_union_base(idx): assert tm.equalContents(result, everything) msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.union([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3]) def test_difference_base(idx): @@ -75,8 +72,8 @@ def test_difference_base(idx): assert tm.equalContents(result, answer) msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): - result = first.difference([1, 2, 3]) + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3]) def test_symmetric_difference(idx): @@ -94,7 +91,7 @@ def test_symmetric_difference(idx): assert tm.equalContents(result, answer) msg = "other must be a MultiIndex or a list of tuples" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): first.symmetric_difference([1, 2, 3]) @@ -159,9 +156,10 @@ def test_difference(idx): 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) expected.names = first.names assert first.names == result.names - tm.assert_raises_regex(TypeError, "other must be a MultiIndex " - "or a list of tuples", - first.difference, [1, 2, 3, 4, 5]) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3, 4, 5]) def test_union(idx): diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 80e2b811ac062..7ad9b43e4c723 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -80,16 +80,16 @@ def test_numpy_argsort(idx): # backwards compatibility concerns if isinstance(type(idx), (CategoricalIndex, RangeIndex)): msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, - np.argsort, idx, axis=1) + with pytest.raises(ValueError, match=msg): + np.argsort(idx, axis=1) msg = "the 'kind' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - idx, kind='mergesort') + with pytest.raises(ValueError, match=msg): + np.argsort(idx, kind='mergesort') msg = "the 'order' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argsort, - idx, order=('a', 'b')) + with pytest.raises(ValueError, match=msg): + np.argsort(idx, order=('a', 'b')) def test_unsortedindex(): diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index f7c2bf3d6bf4f..3c384eed0a848 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -15,7 +15,7 @@ def test_astype_raises(self, dtype): # GH#13149, GH#13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') msg = 'Cannot cast PeriodArray to dtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.astype(dtype) def test_astype_conversion(self): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index fb74244d815c2..1ebc0ecb2fc02 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -199,7 +199,7 @@ def test_constructor_dtype(self): assert res.dtype == 'period[M]' msg = 'specified freq and dtype are different' - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex(['2011-01'], freq='M', dtype='period[D]') def test_constructor_empty(self): @@ -208,7 +208,7 @@ def test_constructor_empty(self): assert len(idx) == 0 assert idx.freq == 'M' - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): pd.PeriodIndex([]) def test_constructor_pi_nat(self): @@ -234,35 +234,35 @@ def test_constructor_pi_nat(self): idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): PeriodIndex([pd.NaT, pd.NaT]) - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): PeriodIndex(np.array([pd.NaT, pd.NaT])) - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): PeriodIndex(['NaT', 'NaT']) - with tm.assert_raises_regex(ValueError, 'freq not specified'): + with pytest.raises(ValueError, match='freq not specified'): PeriodIndex(np.array(['NaT', 'NaT'])) def test_constructor_incompat_freq(self): msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='D')]) - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='D')])) # first element is pd.NaT - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex([pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='D')]) - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='D')])) @@ -339,15 +339,15 @@ def test_constructor_freq_mult(self): msg = ('Frequency must be positive, because it' ' represents span: -1M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): PeriodIndex(['2011-01'], freq='-1M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): PeriodIndex(['2011-01'], freq='0M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range('2011-01', periods=3, freq='0M') @pytest.mark.parametrize('freq', ['A', 'M', 'D', 'T', 'S']) @@ -442,12 +442,12 @@ def test_constructor_error(self): end_intv = Period('2006-12-31', ('w', 1)) msg = 'start and end must have same freq' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): PeriodIndex(start=start, end=end_intv) msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): PeriodIndex(start=start) @pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 880e37c59c9c4..c92769311d848 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -101,10 +101,9 @@ def test_getitem_partial(self): tm.assert_series_equal(exp, result) ts = ts[10:].append(ts[10:]) - tm.assert_raises_regex(KeyError, - "left slice bound for non-unique " - "label: '2008'", - ts.__getitem__, slice('2008', '2009')) + msg = "left slice bound for non-unique label: '2008'" + with pytest.raises(KeyError, match=msg): + ts[slice('2008', '2009')] def test_getitem_datetime(self): rng = period_range(start='2012-01-01', periods=10, freq='W-MON') @@ -313,9 +312,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -362,9 +361,9 @@ def test_get_loc(self): assert idx0.get_loc(p2) == expected_idx1_p2 assert idx0.get_loc(str(p2)) == expected_idx1_p2 - tm.assert_raises_regex(KeyError, - "Cannot interpret 'foo' as period", - idx0.get_loc, 'foo') + msg = "Cannot interpret 'foo' as period" + with pytest.raises(KeyError, match=msg): + idx0.get_loc('foo') pytest.raises(KeyError, idx0.get_loc, 1.1) pytest.raises(TypeError, idx0.get_loc, idx0) @@ -379,9 +378,10 @@ def test_get_loc(self): assert idx1.get_loc(p2) == expected_idx1_p2 assert idx1.get_loc(str(p2)) == expected_idx1_p2 - tm.assert_raises_regex(KeyError, - "Cannot interpret 'foo' as period", - idx1.get_loc, 'foo') + msg = "Cannot interpret 'foo' as period" + with pytest.raises(KeyError, match=msg): + idx1.get_loc('foo') + pytest.raises(KeyError, idx1.get_loc, 1.1) pytest.raises(TypeError, idx1.get_loc, idx1) @@ -564,12 +564,13 @@ def test_get_loc2(self): tolerance=np.timedelta64(1, 'D')) == 1 assert idx.get_loc('2000-01-02T12', method='nearest', tolerance=timedelta(1)) == 1 - with tm.assert_raises_regex(ValueError, - 'unit abbreviation w/o a number'): + + msg = 'unit abbreviation w/o a number' + with pytest.raises(ValueError, match=msg): idx.get_loc('2000-01-10', method='nearest', tolerance='foo') msg = 'Input has different freq from PeriodArray\\(freq=D\\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') with pytest.raises(KeyError): idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') @@ -599,7 +600,7 @@ def test_get_indexer2(self): np.array([0, -1, 1], dtype=np.intp)) msg = 'Input has different freq from PeriodArray\\(freq=H\\)' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.get_indexer(target, 'nearest', tolerance='1 minute') tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index ede5256db2f1d..01347db4db3b2 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -65,17 +65,19 @@ def test_numpy_minmax(self): assert np.max(pr) == Period('2016-01-20', freq='D') errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, errmsg, np.min, pr, out=0) - tm.assert_raises_regex(ValueError, errmsg, np.max, pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.min(pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(pr, out=0) assert np.argmin(pr) == 0 assert np.argmax(pr) == 5 errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex( - ValueError, errmsg, np.argmin, pr, out=0) - tm.assert_raises_regex( - ValueError, errmsg, np.argmax, pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmin(pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(pr, out=0) def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index fcf1156266880..137a7be987d5b 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -41,12 +41,12 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), period_range('2014-01', periods=20, freq='M')) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] def test_slice_keep_name(self): idx = period_range('20010101', periods=10, freq='D', name='bob') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 8d10cb8e42a94..ddb3fe686534a 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -92,8 +92,8 @@ def test_difference_freq(self): def test_hash_error(self): index = period_range('20010101', periods=10) - with tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__): + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): hash(index) def test_make_time_series(self): @@ -452,8 +452,8 @@ def test_numpy_repeat(self): tm.assert_index_equal(np.repeat(index, 2), expected) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex( - ValueError, msg, np.repeat, index, 2, axis=1) + with pytest.raises(ValueError, match=msg): + np.repeat(index, 2, axis=1) def test_pindex_multiples(self): pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') @@ -568,5 +568,5 @@ def test_maybe_convert_timedelta(): assert pi._maybe_convert_timedelta(2) == 2 offset = offsets.BusinessDay() - with tm.assert_raises_regex(ValueError, 'freq'): + with pytest.raises(ValueError, match='freq'): pi._maybe_convert_timedelta(offset) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 11d38df1dd49c..aa300111ba67a 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -65,31 +65,31 @@ def test_errors(self): # not enough params msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(start='2017Q1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(end='2017Q1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(periods=5) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range() # too many params - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q') # start/end NaT msg = 'start and end must not be NaT' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(start=NaT, end='2018Q1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): period_range(start='2017Q1', end=NaT) # invalid periods param msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): period_range(start='2017Q1', periods='foo') diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index c4dd23b1708db..c8b7d82855519 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -106,7 +106,7 @@ def test_union_misc(self): index.union(index2) msg = 'can only call with other PeriodIndex-ed objects' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.join(index.to_timestamp()) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 8d09273bde63d..c8e1e6c1f3525 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -180,7 +180,7 @@ def test_to_period_monthish(self): assert prng.freq == 'M' msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range('01-Jan-2012', periods=8, freq='EOM') def test_period_dt64_round_trip(self): @@ -219,11 +219,11 @@ def test_searchsorted(self, freq): assert pidx.searchsorted(p2) == 3 msg = "Input has different freq=H from PeriodIndex" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): pidx.searchsorted(pd.Period('2014-01-01', freq='H')) msg = "Input has different freq=5D from PeriodIndex" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) @@ -260,7 +260,7 @@ def test_to_timestamp_pi_nat(self): msg = ('Frequency must be positive, because it' ' represents span: -2A') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): result.to_period(freq='-2A') def test_to_timestamp_preserve_name(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 724dffc49dd3b..666420a6a9b06 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -267,7 +267,7 @@ def test_constructor_int_dtype_nan_raises(self, dtype): # see gh-15187 data = [np.nan] msg = "cannot convert" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Index(data, dtype=dtype) @pytest.mark.parametrize("klass,dtype,na_val", [ @@ -464,29 +464,28 @@ def test_constructor_empty_special(self, empty, klass): assert isinstance(empty, klass) assert not len(empty) - def test_constructor_nonhashable_name(self, indices): + def test_constructor_non_hashable_name(self, indices): # GH 20527 if isinstance(indices, MultiIndex): pytest.skip("multiindex handled in test_multi.py") - name = ['0'] message = "Index.name must be a hashable type" - tm.assert_raises_regex(TypeError, message, name=name) + renamed = [['1']] # With .rename() - renamed = [['1']] - tm.assert_raises_regex(TypeError, message, - indices.rename, name=renamed) + with pytest.raises(TypeError, match=message): + indices.rename(name=renamed) + # With .set_names() - tm.assert_raises_regex(TypeError, message, - indices.set_names, names=renamed) + with pytest.raises(TypeError, match=message): + indices.set_names(names=renamed) def test_constructor_overflow_int64(self): # see gh-15832 msg = ("The elements provided in the data cannot " "all be casted to the dtype int64") - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Index([np.iinfo(np.uint64).max - 1], dtype="int64") @pytest.mark.xfail(reason="see GH#21311: Index " @@ -494,7 +493,7 @@ def test_constructor_overflow_int64(self): strict=True) def test_constructor_cast(self): msg = "could not convert string to float" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Index(["a", "b", "c"], dtype=float) def test_view_with_args(self): @@ -1327,10 +1326,10 @@ def test_get_indexer_invalid(self): # GH10411 index = Index(np.arange(10)) - with tm.assert_raises_regex(ValueError, 'tolerance argument'): + with pytest.raises(ValueError, match='tolerance argument'): index.get_indexer([1, 0], tolerance=1) - with tm.assert_raises_regex(ValueError, 'limit argument'): + with pytest.raises(ValueError, match='limit argument'): index.get_indexer([1, 0], limit=1) @pytest.mark.parametrize( @@ -1378,7 +1377,7 @@ def test_get_indexer_nearest_listlike_tolerance(self, tolerance, def test_get_indexer_nearest_error(self): index = Index(np.arange(10)) - with tm.assert_raises_regex(ValueError, 'limit argument'): + with pytest.raises(ValueError, match='limit argument'): index.get_indexer([1, 0], method='nearest', limit=1) with pytest.raises(ValueError, match='tolerance size must match'): @@ -1465,7 +1464,7 @@ def test_get_loc_raises_bad_label(self, method): else: msg = 'invalid key' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): index.get_loc([1, 2], method=method) @pytest.mark.parametrize("method,loc", [ @@ -1478,32 +1477,32 @@ def test_get_loc_tolerance(self, method, loc): @pytest.mark.parametrize("method", ['pad', 'backfill', 'nearest']) def test_get_loc_outside_tolerance_raises(self, method): index = pd.Index([0, 1, 2]) - with tm.assert_raises_regex(KeyError, '1.1'): + with pytest.raises(KeyError, match='1.1'): index.get_loc(1.1, method, tolerance=0.05) def test_get_loc_bad_tolerance_raises(self): index = pd.Index([0, 1, 2]) - with tm.assert_raises_regex(ValueError, 'must be numeric'): + with pytest.raises(ValueError, match='must be numeric'): index.get_loc(1.1, 'nearest', tolerance='invalid') def test_get_loc_tolerance_no_method_raises(self): index = pd.Index([0, 1, 2]) - with tm.assert_raises_regex(ValueError, 'tolerance .* valid if'): + with pytest.raises(ValueError, match='tolerance .* valid if'): index.get_loc(1.1, tolerance=1) def test_get_loc_raises_missized_tolerance(self): index = pd.Index([0, 1, 2]) - with tm.assert_raises_regex(ValueError, 'tolerance size must match'): + with pytest.raises(ValueError, match='tolerance size must match'): index.get_loc(1.1, 'nearest', tolerance=[1, 1]) def test_get_loc_raises_object_nearest(self): index = pd.Index(['a', 'c']) - with tm.assert_raises_regex(TypeError, 'unsupported operand type'): + with pytest.raises(TypeError, match='unsupported operand type'): index.get_loc('a', method='nearest') def test_get_loc_raises_object_tolerance(self): index = pd.Index(['a', 'c']) - with tm.assert_raises_regex(TypeError, 'unsupported operand type'): + with pytest.raises(TypeError, match='unsupported operand type'): index.get_loc('a', method='pad', tolerance='invalid') @pytest.mark.parametrize("dtype", [int, float]) @@ -1585,10 +1584,10 @@ def test_slice_locs_na(self): def test_slice_locs_na_raises(self): index = Index([np.nan, 1, 2]) - with tm.assert_raises_regex(KeyError, ''): + with pytest.raises(KeyError, match=''): index.slice_locs(start=1.5) - with tm.assert_raises_regex(KeyError, ''): + with pytest.raises(KeyError, match=''): index.slice_locs(end=1.5) @pytest.mark.parametrize("in_slice,expected", [ @@ -1627,7 +1626,7 @@ def test_drop_by_str_label(self): @pytest.mark.parametrize("keys", [['foo', 'bar'], ['1', 'bar']]) def test_drop_by_str_label_raises_missing_keys(self, keys): - with tm.assert_raises_regex(KeyError, ''): + with pytest.raises(KeyError, match=''): self.strIndex.drop(keys) def test_drop_by_str_label_errors_ignore(self): @@ -1656,7 +1655,7 @@ def test_drop_by_numeric_label_loc(self): def test_drop_by_numeric_label_raises_missing_keys(self): index = Index([1, 2, 3]) - with tm.assert_raises_regex(KeyError, ''): + with pytest.raises(KeyError, match=''): index.drop([3, 4]) @pytest.mark.parametrize("key,expected", [ @@ -1789,7 +1788,7 @@ def test_isin_level_kwarg(self, level, index): # Float64Index overrides isin, so must be checked separately Float64Index([1.0, 2.0, 3.0, 4.0])]) def test_isin_level_kwarg_raises_bad_index(self, level, index): - with tm.assert_raises_regex(IndexError, 'Too many levels'): + with pytest.raises(IndexError, match='Too many levels'): index.isin([], level=level) @pytest.mark.parametrize("level", [1.0, 'foobar', 'xyzzy', np.nan]) @@ -1797,7 +1796,7 @@ def test_isin_level_kwarg_raises_bad_index(self, level, index): Index(['qux', 'baz', 'foo', 'bar']), Float64Index([1.0, 2.0, 3.0, 4.0])]) def test_isin_level_kwarg_raises_key(self, level, index): - with tm.assert_raises_regex(KeyError, 'must be same as name'): + with pytest.raises(KeyError, match='must be same as name'): index.isin([], level=level) @pytest.mark.parametrize("empty", [[], Series(), np.array([])]) @@ -1860,7 +1859,7 @@ def test_str_attribute(self, method): MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]), PeriodIndex(start='2000', end='2010', freq='A')]) def test_str_attribute_raises(self, index): - with tm.assert_raises_regex(AttributeError, 'only use .str accessor'): + with pytest.raises(AttributeError, match='only use .str accessor'): index.str.repeat(2) @pytest.mark.parametrize("expand,expected", [ @@ -1951,14 +1950,14 @@ def test_take_fill_value_none_raises(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): index.take(np.array([1, 0, -5]), fill_value=True) def test_take_bad_bounds_raises(self): index = pd.Index(list('ABC'), name='xxx') - with tm.assert_raises_regex(IndexError, 'out of bounds'): + with pytest.raises(IndexError, match='out of bounds'): index.take(np.array([1, -5])) @pytest.mark.parametrize("name", [None, 'foobar']) @@ -2032,7 +2031,7 @@ def test_equals_op_multiindex_identify(self): def test_equals_op_mismatched_multiindex_raises(self, index): df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1]) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): df.index == index def test_equals_op_index_vs_mi_same_length(self): @@ -2240,7 +2239,7 @@ def test_iadd_preserves_name(self): def test_cached_properties_not_settable(self): index = pd.Index([1, 2, 3]) - with tm.assert_raises_regex(AttributeError, "Can't set attribute"): + with pytest.raises(AttributeError, match="Can't set attribute"): index.is_unique = False def test_get_duplicates_deprecated(self): @@ -2277,10 +2276,10 @@ def create_index(self): def test_argsort(self): index = self.create_index() if PY36: - with tm.assert_raises_regex(TypeError, "'>|<' not supported"): + with pytest.raises(TypeError, match="'>|<' not supported"): result = index.argsort() elif PY3: - with tm.assert_raises_regex(TypeError, "unorderable types"): + with pytest.raises(TypeError, match="unorderable types"): result = index.argsort() else: result = index.argsort() @@ -2290,10 +2289,10 @@ def test_argsort(self): def test_numpy_argsort(self): index = self.create_index() if PY36: - with tm.assert_raises_regex(TypeError, "'>|<' not supported"): + with pytest.raises(TypeError, match="'>|<' not supported"): result = np.argsort(index) elif PY3: - with tm.assert_raises_regex(TypeError, "unorderable types"): + with pytest.raises(TypeError, match="unorderable types"): result = np.argsort(index) else: result = np.argsort(index) @@ -2462,7 +2461,7 @@ def test_dropna_dt_like(self, how, index, expected): def test_dropna_invalid_how_raises(self): msg = "invalid how option: xxx" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.Index([1, 2, 3]).dropna(how='xxx') def test_get_combined_index(self): @@ -2586,7 +2585,7 @@ def test_generated_op_names(opname, indices): @pytest.mark.parametrize('index_maker', tm.index_subclass_makers_generator()) def test_index_subclass_constructor_wrong_kwargs(index_maker): # GH #19348 - with tm.assert_raises_regex(TypeError, 'unexpected keyword argument'): + with pytest.raises(TypeError, match='unexpected keyword argument'): index_maker(foo='bar') diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 5c4e4d2417957..6c5a70d76e3b5 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -342,7 +342,7 @@ def test_append(self): result = ci.append([]) tm.assert_index_equal(result, ci, exact=True) - # appending with different categories or reoreded is not ok + # appending with different categories or reordered is not ok pytest.raises( TypeError, lambda: ci.append(ci.values.set_categories(list('abcd')))) @@ -481,7 +481,7 @@ def test_reindex_base(self): actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) - with tm.assert_raises_regex(ValueError, "Invalid fill method"): + with pytest.raises(ValueError, match="Invalid fill method"): idx.get_indexer(idx, method="invalid") def test_reindexing(self): @@ -758,7 +758,7 @@ def test_equals_categorical(self): assert (ci1 == ci1.values).all() # invalid comparisons - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with pytest.raises(ValueError, match="Lengths must match"): ci1 == Index(['a', 'b', 'c']) pytest.raises(TypeError, lambda: ci1 == ci2) pytest.raises( @@ -1000,8 +1000,8 @@ def test_fillna_categorical(self): tm.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError - with tm.assert_raises_regex(ValueError, - 'fill value must be in categories'): + msg = 'fill value must be in categories' + with pytest.raises(ValueError, match=msg): idx.fillna(2.0) def test_take_fill_value(self): @@ -1055,9 +1055,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -1093,9 +1093,9 @@ def test_take_fill_value_datetime(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -1106,16 +1106,16 @@ def test_take_invalid_kwargs(self): indices = [1, 0, -1] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') @pytest.mark.parametrize('dtype, engine_type', [ (np.int8, libindex.Int8Engine), diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 8373cbc89149a..c125db16bcbff 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -232,7 +232,7 @@ def test_astype(self): def test_type_coercion_fail(self, any_int_dtype): # see gh-15832 msg = "Trying to coerce float values to integers" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Index([1, 2, 3.5], dtype=any_int_dtype) def test_type_coercion_valid(self, float_dtype): @@ -288,7 +288,7 @@ def test_get_loc(self): pytest.raises(KeyError, idx.get_loc, True) pytest.raises(KeyError, idx.get_loc, False) - with tm.assert_raises_regex(ValueError, 'must be numeric'): + with pytest.raises(ValueError, match='must be numeric'): idx.get_loc(1.4, method='nearest', tolerance='foo') with pytest.raises(ValueError, match='must contain numeric elements'): @@ -393,9 +393,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -540,7 +540,7 @@ def test_take_fill_value(self): "{name} cannot contain NA").format(name=name) # fill_value=True - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False @@ -549,9 +549,9 @@ def test_take_fill_value(self): expected = self._holder([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -613,11 +613,11 @@ def test_constructor_corner(self): # preventing casting arr = np.array([1, '2', 3, '4'], dtype=object) - with tm.assert_raises_regex(TypeError, 'casting'): + with pytest.raises(TypeError, match='casting'): Int64Index(arr) arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] - with tm.assert_raises_regex(TypeError, 'casting'): + with pytest.raises(TypeError, match='casting'): Int64Index(arr_with_floats) def test_constructor_coercion_signed_to_unsigned(self, uint_dtype): @@ -625,7 +625,7 @@ def test_constructor_coercion_signed_to_unsigned(self, uint_dtype): # see gh-15832 msg = "Trying to coerce negative values to unsigned integers" - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Index([-1], dtype=uint_dtype) def test_coerce_list(self): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index efea9b58ecb7a..d0f8768456bc5 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -63,11 +63,9 @@ def test_binops_pow(self): self.check_binop(ops, scalars, idxs) def test_too_many_names(self): - def testit(): + with pytest.raises(ValueError, match="^Length"): self.index.names = ["roger", "harold"] - tm.assert_raises_regex(ValueError, "^Length", testit) - def test_constructor(self): index = RangeIndex(5) expected = np.arange(5, dtype=np.int64) @@ -91,7 +89,7 @@ def test_constructor(self): tm.assert_index_equal(Index(expected), index) msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): RangeIndex() for index in [RangeIndex(0), RangeIndex(start=0), RangeIndex(stop=0), @@ -103,7 +101,7 @@ def test_constructor(self): assert index._step == 1 tm.assert_index_equal(Index(expected), index) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): RangeIndex(name='Foo') for index in [RangeIndex(0, name='Foo'), @@ -765,7 +763,7 @@ def test_take_fill_value(self): # fill_value msg = "Unable to fill values because RangeIndex cannot contain NA" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False @@ -775,9 +773,9 @@ def test_take_fill_value(self): tm.assert_index_equal(result, expected) msg = "Unable to fill values because RangeIndex cannot contain NA" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 54f1ac601fd69..1a0481b730618 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -75,5 +75,5 @@ def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) msg = 'Cannot cast TimedeltaIndex to dtype' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): idx.astype(dtype) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index a5cfad98b31c1..1abda624777c8 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -57,7 +57,7 @@ def test_constructor_coverage(self): tm.assert_index_equal(rng, exp) msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): TimedeltaIndex(start='1 days', periods='foo', freq='D') pytest.raises(ValueError, TimedeltaIndex, start='1 days', diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index e0e932efafd55..bfed4114929b7 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -101,16 +101,16 @@ def test_take_invalid_kwargs(self): indices = [1, 6, 5, 9, 10, 13, 15, 3] msg = r"take\(\) got an unexpected keyword argument 'foo'" - tm.assert_raises_regex(TypeError, msg, idx.take, - indices, foo=2) + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, out=indices) + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, idx.take, - indices, mode='clip') + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode='clip') # TODO: This method came from test_timedelta; de-dup with version above def test_take2(self): @@ -151,9 +151,9 @@ def test_take_fill_value(self): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): @@ -239,8 +239,8 @@ def test_delete(self): assert result.freq == expected.freq with pytest.raises((IndexError, ValueError)): - # either depeidnig on numpy version - result = idx.delete(5) + # either depending on numpy version + idx.delete(5) def test_delete_slice(self): idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') @@ -285,8 +285,7 @@ def test_get_loc(self): assert idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)) == 1 - with tm.assert_raises_regex(ValueError, - 'unit abbreviation w/o a number'): + with pytest.raises(ValueError, match='unit abbreviation w/o a number'): idx.get_loc(idx[1], method='nearest', tolerance='foo') with pytest.raises( diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index a8cfdd0add178..2fc0a49d789fd 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -61,17 +61,19 @@ def test_numpy_minmax(self): assert np.max(td) == Timedelta('16820 days') errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, errmsg, np.min, td, out=0) - tm.assert_raises_regex(ValueError, errmsg, np.max, td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.min(td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(td, out=0) assert np.argmin(td) == 0 assert np.argmax(td) == 5 errmsg = "the 'out' parameter is not supported" - tm.assert_raises_regex( - ValueError, errmsg, np.argmin, td, out=0) - tm.assert_raises_regex( - ValueError, errmsg, np.argmax, td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmin(td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(td, out=0) def test_value_counts_unique(self): # GH 7735 @@ -317,16 +319,16 @@ def test_freq_setter_errors(self): # setting with an incompatible freq msg = ('Inferred frequency 2D from passed values does not conform to ' 'passed frequency 5D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.freq = '5D' # setting with a non-fixed frequency msg = r'<2 \* BusinessDays> is a non-fixed frequency' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): idx.freq = '2B' # setting with non-freq string - with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + with pytest.raises(ValueError, match='Invalid frequency'): idx.freq = 'foo' diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index 4dfce3dbe23a6..62bf2a0b4a1cf 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -2,7 +2,6 @@ import pytest import pandas as pd -import pandas.util.testing as tm from pandas import Series, Timedelta, timedelta_range from pandas.util.testing import assert_series_equal @@ -78,9 +77,9 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: ts.loc[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + ts.loc[::0] diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py index b1d8a12943dca..abd08e37681dd 100644 --- a/pandas/tests/indexes/timedeltas/test_scalar_compat.py +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -3,6 +3,7 @@ Tests for TimedeltaIndex methods behaving like their Timedelta counterparts """ +import pytest import numpy as np import pandas as pd @@ -51,13 +52,13 @@ def test_tdi_round(self): assert elt.round(freq='H') == expected_elt msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): td.round(freq='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): elt.round(freq='foo') msg = " is a non-fixed frequency" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): td.round(freq='M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): elt.round(freq='M') diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 9bc2e93f8468c..1d068971fad2d 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -199,8 +199,8 @@ def test_pickle(self): def test_hash_error(self): index = timedelta_range('1 days', periods=10) - with tm.assert_raises_regex(TypeError, "unhashable type: %r" % - type(index).__name__): + with pytest.raises(TypeError, match=("unhashable type: %r" % + type(index).__name__)): hash(index) def test_append_join_nondatetimeindex(self): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index e77c03465d047..238fd861a92ab 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -36,10 +36,10 @@ def test_timedelta_range(self): arr = np.arange(10).reshape(2, 5) df = pd.DataFrame(np.arange(10).reshape(2, 5)) for arg in (arr, df): - with tm.assert_raises_regex(TypeError, "1-d array"): + with pytest.raises(TypeError, match="1-d array"): to_timedelta(arg) for errors in ['ignore', 'raise', 'coerce']: - with tm.assert_raises_regex(TypeError, "1-d array"): + with pytest.raises(TypeError, match="1-d array"): to_timedelta(arg, errors=errors) # issue10583 @@ -65,18 +65,18 @@ def test_errors(self): # not enough params msg = ('Of the four parameters: start, end, periods, and freq, ' 'exactly three must be specified') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range(start='0 days') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range(end='5 days') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range(periods=2) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range() # too many params - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): timedelta_range(start='0 days', end='5 days', periods=10, freq='H') diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 95a77f1b7fe44..b56dd3cababb9 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -110,8 +110,8 @@ def test_to_timedelta_invalid(self): # bad value for errors parameter msg = "errors must be one of" - tm.assert_raises_regex(ValueError, msg, to_timedelta, - ['foo'], errors='never') + with pytest.raises(ValueError, match=msg): + to_timedelta(['foo'], errors='never') # these will error pytest.raises(ValueError, lambda: to_timedelta([1, 2], unit='foo')) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 3a235e1eeb0dc..b7443e242137b 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -362,10 +362,9 @@ def test_loc_listlike_dtypes(self): exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assert_raises_regex( - KeyError, - 'a list-indexer must only include values that are ' - 'in the categories'): + msg = ('a list-indexer must only include ' + 'values that are in the categories') + with pytest.raises(KeyError, match=msg): df.loc[['a', 'x']] # duplicated categories and codes @@ -387,10 +386,9 @@ def test_loc_listlike_dtypes(self): ]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assert_raises_regex( - KeyError, - 'a list-indexer must only include values ' - 'that are in the categories'): + msg = ('a list-indexer must only include values ' + 'that are in the categories') + with pytest.raises(KeyError, match=msg): df.loc[['a', 'x']] # contains unused category @@ -417,10 +415,9 @@ def test_loc_listlike_dtypes(self): categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assert_raises_regex( - KeyError, - 'a list-indexer must only include values ' - 'that are in the categories'): + msg = ('a list-indexer must only include values ' + 'that are in the categories') + with pytest.raises(KeyError, match=msg): df.loc[['a', 'x']] def test_get_indexer_array(self): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 65110d4955294..2bc3aefcf7eb1 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -374,14 +374,14 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype): msg = "Passed item and index have different timezone" if fill_val.tz: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): obj.insert(1, pd.Timestamp('2012-01-01')) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo')) msg = "cannot insert DatetimeIndex with incompatible label" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.insert(1, 1) pytest.xfail("ToDo: must coerce to object") @@ -397,12 +397,12 @@ def test_insert_index_timedelta64(self): # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.insert(1, pd.Timestamp('2012-01-01')) # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.insert(1, 1) @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [ @@ -603,7 +603,7 @@ def test_where_index_datetime(self): msg = ("Index\\(\\.\\.\\.\\) must be called with a collection " "of some kind") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.where(cond, fill_val) values = pd.Index(pd.date_range(fill_val, periods=4)) @@ -628,7 +628,7 @@ def test_where_index_datetimetz(self): msg = ("Index\\(\\.\\.\\.\\) must be called with a collection " "of some kind") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): obj.where(cond, fill_val) values = pd.Index(pd.date_range(fill_val, periods=4)) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 0a55b3f67dd3f..de91b8f4a796c 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -50,11 +50,9 @@ def test_scalar_error(self): s = Series(np.arange(len(i)), index=i) - def f(): + msg = 'Cannot index by location index' + with pytest.raises(TypeError, match=msg): s.iloc[3.0] - tm.assert_raises_regex(TypeError, - 'Cannot index by location index', - f) def f(): s.iloc[3.0] = 0 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 6d0b516d8ebf0..53d07aeef304a 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -21,12 +21,10 @@ def test_iloc_exceeds_bounds(self): # GH6296 # iloc should allow indexers that exceed the bounds df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) - expected = df # lists of positions should raise IndexErrror! - with tm.assert_raises_regex(IndexError, - 'positional indexers ' - 'are out-of-bounds'): + msg = 'positional indexers are out-of-bounds' + with pytest.raises(IndexError, match=msg): df.iloc[:, [0, 1, 2, 3, 4, 5]] pytest.raises(IndexError, lambda: df.iloc[[1, 30]]) pytest.raises(IndexError, lambda: df.iloc[[1, -30]]) @@ -38,14 +36,14 @@ def test_iloc_exceeds_bounds(self): # still raise on a single indexer msg = 'single positional indexer is out-of-bounds' - with tm.assert_raises_regex(IndexError, msg): + with pytest.raises(IndexError, match=msg): df.iloc[30] pytest.raises(IndexError, lambda: df.iloc[-30]) # GH10779 # single positive/negative indexer exceeding Series bounds should raise # an IndexError - with tm.assert_raises_regex(IndexError, msg): + with pytest.raises(IndexError, match=msg): s.iloc[30] pytest.raises(IndexError, lambda: s.iloc[-30]) @@ -136,8 +134,8 @@ def test_iloc_getitem_invalid_scalar(self, dims): else: s = DataFrame(np.arange(100).reshape(10, 10)) - tm.assert_raises_regex(TypeError, 'Cannot index by location index', - lambda: s.iloc['a']) + with pytest.raises(TypeError, match='Cannot index by location index'): + s.iloc['a'] def test_iloc_array_not_mutating_negative_indices(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 3b95ba8e4b9d8..4236a80bc98f1 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -837,15 +837,14 @@ def assert_slices_equivalent(l_slc, i_slc): def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: s[::0]) - tm.assert_raises_regex(ValueError, 'slice step cannot be zero', - lambda: s.loc[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + s[::0] + with pytest.raises(ValueError, match='slice step cannot be zero'): + s.loc[::0] with catch_warnings(record=True): simplefilter("ignore") - tm.assert_raises_regex(ValueError, - 'slice step cannot be zero', - lambda: s.ix[::0]) + with pytest.raises(ValueError, match='slice step cannot be zero'): + s.ix[::0] def test_indexing_assignment_dict_already_exists(self): df = DataFrame({'x': [1, 2, 6], @@ -1062,18 +1061,18 @@ def test_validate_indices_ok(): def test_validate_indices_low(): indices = np.asarray([0, -2]) - with tm.assert_raises_regex(ValueError, "'indices' contains"): + with pytest.raises(ValueError, match="'indices' contains"): validate_indices(indices, 2) def test_validate_indices_high(): indices = np.asarray([0, 1, 2]) - with tm.assert_raises_regex(IndexError, "indices are out"): + with pytest.raises(IndexError, match="indices are out"): validate_indices(indices, 2) def test_validate_indices_empty(): - with tm.assert_raises_regex(IndexError, "indices are out"): + with pytest.raises(IndexError, match="indices are out"): validate_indices(np.array([0, 1]), 0) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index dcf148f199d52..ea17844a75033 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -308,9 +308,9 @@ def test_getitem_partial_int(self): tm.assert_frame_equal(result, expected) # missing item: - with tm.assert_raises_regex(KeyError, '1'): + with pytest.raises(KeyError, match='1'): df[1] - with tm.assert_raises_regex(KeyError, r"'\[1\] not in index'"): + with pytest.raises(KeyError, match=r"'\[1\] not in index'"): df[[1]] def test_loc_multiindex_indexer_none(self): @@ -851,10 +851,10 @@ def f(): assert df.index.lexsort_depth == 2 df = df.sort_index(level=1, axis=0) assert df.index.lexsort_depth == 0 - with tm.assert_raises_regex( - UnsortedIndexError, - 'MultiIndex slicing requires the index to be ' - r'lexsorted: slicing on levels \[1\], lexsort depth 0'): + + msg = ('MultiIndex slicing requires the index to be ' + r'lexsorted: slicing on levels \[1\], lexsort depth 0') + with pytest.raises(UnsortedIndexError, match=msg): df.loc[(slice(None), slice('bar')), :] # GH 16734: not sorted, but no real slicing diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index d45209fd277f1..fbbfdfefb67e6 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -151,8 +151,8 @@ def test_at_to_fail(self): df.columns = ['x', 'x', 'z'] # Check that we get the correct value in the KeyError - tm.assert_raises_regex(KeyError, r"\['y'\] not in index", - lambda: df[['x', 'y', 'z']]) + with pytest.raises(KeyError, match=r"\['y'\] not in index"): + df[['x', 'y', 'z']] def test_at_with_tz(self): # gh-15822 diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index b327b158adc24..97790920d46f7 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1055,8 +1055,8 @@ def test_zero_step_raises(self): def test_unbounded_slice_raises(self): def assert_unbounded_slice_error(slc): - tm.assert_raises_regex(ValueError, "unbounded slice", - lambda: BlockPlacement(slc)) + with pytest.raises(ValueError, match="unbounded slice"): + BlockPlacement(slc) assert_unbounded_slice_error(slice(None, None)) assert_unbounded_slice_error(slice(10, None)) @@ -1247,7 +1247,7 @@ def test_binop_other(self, op, value, dtype): if (op, dtype) in invalid: with pytest.raises(TypeError): - result = op(s, e.value) + op(s, e.value) else: # FIXME: Since dispatching to Series, this test no longer # asserts anything meaningful @@ -1281,5 +1281,5 @@ def test_validate_ndim(): placement = slice(2) msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): make_block(values, placement, ndim=2) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index e407573c9a462..6027fc08624df 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1209,7 +1209,7 @@ def test_text_color_threshold(self, c_map, expected): def test_text_color_threshold_raises(self, text_color_threshold): df = pd.DataFrame([[1, 2], [2, 4]], columns=['A', 'B']) msg = "`text_color_threshold` must be a value from 0 to 1." - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.style.background_gradient( text_color_threshold=text_color_threshold)._compute() diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 228373a7bf545..3792da4b29ef9 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -54,7 +54,7 @@ def test_to_csv_defualt_encoding(self): # Python 3 is uft-8. if pd.compat.PY2: # the encoding argument parameter should be utf-8 - with tm.assert_raises_regex(UnicodeEncodeError, 'ascii'): + with pytest.raises(UnicodeEncodeError, match='ascii'): df.to_csv(path) else: df.to_csv(path) @@ -85,7 +85,7 @@ def test_to_csv_quotechar(self): assert f.read() == expected with tm.ensure_clean('test.csv') as path: - with tm.assert_raises_regex(TypeError, 'quotechar'): + with pytest.raises(TypeError, match='quotechar'): df.to_csv(path, quoting=1, quotechar=None) def test_to_csv_doublequote(self): @@ -103,7 +103,7 @@ def test_to_csv_doublequote(self): from _csv import Error with tm.ensure_clean('test.csv') as path: - with tm.assert_raises_regex(Error, 'escapechar'): + with pytest.raises(Error, match='escapechar'): df.to_csv(path, doublequote=False) # no escapechar set def test_to_csv_escapechar(self): diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 035b2d4c3347c..0416cf6da7912 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1580,7 +1580,7 @@ def test_to_html_invalid_justify(self, justify): df = DataFrame() msg = "Invalid value for justify parameter" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df.to_html(justify=justify) def test_to_html_index(self): diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index b411744f7bac2..c50b6f68b8839 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -3,7 +3,7 @@ import pandas as pd import pandas.util.testing as tm import pandas.util._test_decorators as td -from pandas.util.testing import assert_frame_equal, assert_raises_regex +from pandas.util.testing import assert_frame_equal def test_compression_roundtrip(compression): @@ -81,15 +81,15 @@ def test_write_unsupported_compression_type(): df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') with tm.ensure_clean() as path: msg = "Unrecognized compression type: unsupported" - assert_raises_regex(ValueError, msg, df.to_json, - path, compression="unsupported") + with pytest.raises(ValueError, match=msg): + df.to_json(path, compression="unsupported") def test_read_unsupported_compression_type(): with tm.ensure_clean() as path: msg = "Unrecognized compression type: unsupported" - assert_raises_regex(ValueError, msg, pd.read_json, - path, compression="unsupported") + with pytest.raises(ValueError, match=msg): + pd.read_json(path, compression="unsupported") @pytest.mark.parametrize("to_infer", [True, False]) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 829953c144caa..0b4ff2c34297a 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -409,8 +409,8 @@ def test_convert_json_field_to_pandas_type(self, inp, exp): @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"]) def test_convert_json_field_to_pandas_type_raises(self, inp): field = {'type': inp} - with tm.assert_raises_regex(ValueError, "Unsupported or invalid field " - "type: {}".format(inp)): + with pytest.raises(ValueError, match=("Unsupported or invalid field " + "type: {}".format(inp))): convert_json_field_to_pandas_type(field) def test_categorical(self): @@ -480,7 +480,7 @@ def test_timestamp_in_columns(self): ['a'], [1]], names=["A", "a"])) ]) def test_overlapping_names(self, case): - with tm.assert_raises_regex(ValueError, 'Overlapping'): + with pytest.raises(ValueError, match='Overlapping'): case.to_json(orient='table') def test_mi_falsey_name(self): @@ -526,7 +526,7 @@ def test_read_json_table_orient(self, index_nm, vals, recwarn): def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") - with tm.assert_raises_regex(NotImplementedError, 'can not yet read '): + with pytest.raises(NotImplementedError, match='can not yet read '): pd.read_json(out, orient="table") def test_comprehensive(self): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 04f0220839523..d047970ce2f08 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -344,8 +344,7 @@ def test_frame_from_json_bad_data(self): json = StringIO('{"badkey":["A","B"],' '"index":["2","3"],' '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}') - with tm.assert_raises_regex(ValueError, - r"unexpected key\(s\): badkey"): + with pytest.raises(ValueError, match=r"unexpected key\(s\): badkey"): read_json(json, orient="split") def test_frame_from_json_nones(self): @@ -839,7 +838,7 @@ def test_misc_example(self): DataFrame\\.index values are different \\(100\\.0 %\\) \\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\) \\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)""" - with tm.assert_raises_regex(AssertionError, error_msg): + with pytest.raises(AssertionError, match=error_msg): assert_frame_equal(result, expected, check_index_type=False) result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') @@ -1122,9 +1121,7 @@ def test_to_jsonl(self): def test_latin_encoding(self): if compat.PY2: - tm.assert_raises_regex( - TypeError, r'\[unicode\] is not implemented as a table column') - return + pytest.skip("[unicode] is not implemented as a table column") # GH 13774 pytest.skip("encoding not implemented in .to_json(), " @@ -1229,7 +1226,7 @@ def test_index_false_error_to_json(self, orient): df = pd.DataFrame([[1, 2], [4, 5]], columns=['a', 'b']) - with tm.assert_raises_regex(ValueError, "'index=False' is only " - "valid when 'orient' is " - "'split' or 'table'"): + msg = ("'index=False' is only valid when " + "'orient' is 'split' or 'table'") + with pytest.raises(ValueError, match=msg): df.to_json(orient=orient, index=False) diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 3f61f702b7c9c..25750f4fd23b5 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -81,7 +81,7 @@ def test_readjson_chunks(lines_json_df, chunksize): def test_readjson_chunksize_requires_lines(lines_json_df): msg = "chunksize can only be passed if lines=True" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2) @@ -138,7 +138,7 @@ def test_readjson_chunks_closes(chunksize): def test_readjson_invalid_chunksize(lines_json_df, chunksize): msg = r"'chunksize' must be an integer >=1" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 6706a29e78ae8..4ad4f71791079 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -707,7 +707,7 @@ def my_handler(_): def my_handler_raises(_): raise TypeError("I raise for anything") - with tm.assert_raises_regex(TypeError, "I raise for anything"): + with pytest.raises(TypeError, match="I raise for anything"): ujson.encode(_TestObject("foo"), default_handler=my_handler_raises) def my_int_handler(_): diff --git a/pandas/tests/io/msgpack/test_except.py b/pandas/tests/io/msgpack/test_except.py index 5a803c5eba34b..8e8d43a16eee9 100644 --- a/pandas/tests/io/msgpack/test_except.py +++ b/pandas/tests/io/msgpack/test_except.py @@ -4,7 +4,6 @@ from pandas.io.msgpack import packb, unpackb import pytest -import pandas.util.testing as tm class DummyException(Exception): @@ -15,7 +14,7 @@ class TestExceptions(object): def test_raise_on_find_unsupported_value(self): msg = "can\'t serialize datetime" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): packb(datetime.now()) def test_raise_from_object_hook(self): @@ -35,5 +34,5 @@ def hook(_): def test_invalid_value(self): msg = "Unpack failed: error" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpackb(b"\xd9\x97#DL_") diff --git a/pandas/tests/io/msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py index e4abd4ddb8d13..2d759d6117f2a 100644 --- a/pandas/tests/io/msgpack/test_limits.py +++ b/pandas/tests/io/msgpack/test_limits.py @@ -4,7 +4,6 @@ from pandas.io.msgpack import packb, unpackb, Packer, Unpacker, ExtType import pytest -import pandas.util.testing as tm class TestLimits(object): @@ -41,7 +40,7 @@ def test_max_str_len(self): unpacker.feed(packed) msg = "3 exceeds max_str_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() def test_max_bin_len(self): @@ -56,7 +55,7 @@ def test_max_bin_len(self): unpacker.feed(packed) msg = "3 exceeds max_bin_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() def test_max_array_len(self): @@ -71,7 +70,7 @@ def test_max_array_len(self): unpacker.feed(packed) msg = "3 exceeds max_array_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() def test_max_map_len(self): @@ -86,7 +85,7 @@ def test_max_map_len(self): unpacker.feed(packed) msg = "3 exceeds max_map_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() def test_max_ext_len(self): @@ -101,5 +100,5 @@ def test_max_ext_len(self): unpacker.feed(packed) msg = "4 exceeds max_ext_len" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): unpacker.unpack() diff --git a/pandas/tests/io/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py index dc6fc5ef916b4..be0a23f60f18a 100644 --- a/pandas/tests/io/msgpack/test_sequnpack.py +++ b/pandas/tests/io/msgpack/test_sequnpack.py @@ -5,7 +5,6 @@ from pandas.io.msgpack import OutOfData import pytest -import pandas.util.testing as tm class TestPack(object): @@ -16,7 +15,7 @@ def test_partial_data(self): for data in [b"\xa5", b"h", b"a", b"l", b"l"]: unpacker.feed(data) - with tm.assert_raises_regex(StopIteration, msg): + with pytest.raises(StopIteration, match=msg): next(iter(unpacker)) unpacker.feed(b"o") diff --git a/pandas/tests/io/parser/c_parser_only.py b/pandas/tests/io/parser/c_parser_only.py index edcfe1c0768cd..88db1080642c5 100644 --- a/pandas/tests/io/parser/c_parser_only.py +++ b/pandas/tests/io/parser/c_parser_only.py @@ -7,6 +7,7 @@ further arguments when parsing. """ +from io import TextIOWrapper import os import sys import tarfile @@ -14,7 +15,7 @@ import numpy as np import pytest -from pandas.compat import StringIO, lrange, range +from pandas.compat import PY3, BytesIO, StringIO, lrange, range import pandas.util._test_decorators as td import pandas as pd @@ -34,9 +35,8 @@ def test_buffer_overflow(self, malf): # see gh-9205: test certain malformed input files that cause # buffer overflows in tokenizer.c cperr = 'Buffer overflow caught - possible malformed input file.' - with pytest.raises(pd.errors.ParserError) as excinfo: + with pytest.raises(pd.errors.ParserError, match=cperr): self.read_table(StringIO(malf)) - assert cperr in str(excinfo.value) def test_buffer_rd_bytes(self): # see gh-12098: src->buffer in the C parser can be freed twice leading @@ -99,7 +99,7 @@ def test_dtype_and_names_error(self): 3.0 3 """ # fallback casting, but not castable - with tm.assert_raises_regex(ValueError, 'cannot safely convert'): + with pytest.raises(ValueError, match='cannot safely convert'): self.read_csv(StringIO(data), sep=r'\s+', header=None, names=['a', 'b'], dtype={'a': np.int32}) @@ -455,6 +455,14 @@ def __next__(self): tm.assert_frame_equal(result, expected) + def test_buffer_rd_bytes_bad_unicode(self): + # see gh-22748 + t = BytesIO(b"\xB0") + if PY3: + t = TextIOWrapper(t, encoding='ascii', errors='surrogateescape') + with pytest.raises(UnicodeError): + self.read_csv(t, encoding='UTF-8') + @pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"]) def test_read_tarfile(self, tar_suffix): # see gh-16530 diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index da8118ef3e123..18690a18f7cb3 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -4,7 +4,6 @@ from collections import OrderedDict import csv from datetime import datetime -from io import TextIOWrapper import os import platform import re @@ -45,7 +44,7 @@ def test_empty_decimal_marker(self): """ # Parsers support only length-1 decimals msg = 'Only length-1 decimal markers supported' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), decimal='') def test_bad_stream_exception(self): @@ -67,7 +66,7 @@ def test_bad_stream_exception(self): handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter) as stream: - with tm.assert_raises_regex(UnicodeDecodeError, msg): + with pytest.raises(UnicodeDecodeError, match=msg): self.read_csv(stream) def test_read_csv(self): @@ -128,7 +127,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 4, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): self.read_table(StringIO(data), sep=',', header=1, comment='#') @@ -142,7 +141,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, @@ -159,7 +158,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2]) @@ -175,7 +174,7 @@ def test_malformed(self): 2,3,4 """ msg = 'Expected 3 fields in line 6, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): it = self.read_table(StringIO(data), sep=',', header=1, comment='#', iterator=True, chunksize=1, skiprows=[2]) @@ -192,7 +191,7 @@ def test_malformed(self): footer """ msg = 'Expected 3 fields in line 4, saw 5' - with tm.assert_raises_regex(Exception, msg): + with pytest.raises(Exception, match=msg): self.read_table(StringIO(data), sep=',', header=1, comment='#', skipfooter=1) @@ -367,13 +366,13 @@ def test_read_nrows(self): msg = r"'nrows' must be an integer >=0" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), nrows=1.2) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), nrows='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), nrows=-1) def test_read_chunksize(self): @@ -389,13 +388,13 @@ def test_read_chunksize(self): # with invalid chunksize value: msg = r"'chunksize' must be an integer >=1" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), chunksize=1.3) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), chunksize='foo') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(self.data1), chunksize=0) def test_read_chunksize_and_nrows(self): @@ -1081,7 +1080,7 @@ def test_uneven_lines_with_usecols(self): # make sure that an error is still thrown # when the 'usecols' parameter is not provided msg = r"Expected \d+ fields in line \d+, saw \d+" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): df = self.read_csv(StringIO(csv)) expected = DataFrame({ @@ -1107,10 +1106,10 @@ def test_read_empty_with_usecols(self): # throws the correct error, with or without usecols errmsg = "No columns to parse from file" - with tm.assert_raises_regex(EmptyDataError, errmsg): + with pytest.raises(EmptyDataError, match=errmsg): self.read_csv(StringIO('')) - with tm.assert_raises_regex(EmptyDataError, errmsg): + with pytest.raises(EmptyDataError, match=errmsg): self.read_csv(StringIO(''), usecols=usecols) expected = DataFrame(columns=usecols, index=[0], dtype=np.float64) @@ -1149,8 +1148,7 @@ def test_trailing_spaces(self): def test_raise_on_sep_with_delim_whitespace(self): # see gh-6607 data = 'a b c\n1 2 3' - with tm.assert_raises_regex(ValueError, - 'you can only specify one'): + with pytest.raises(ValueError, match='you can only specify one'): self.read_table(StringIO(data), sep=r'\s', delim_whitespace=True) def test_single_char_leading_whitespace(self): @@ -1395,7 +1393,7 @@ def test_null_byte_char(self): tm.assert_frame_equal(out, expected) else: msg = "NULL byte detected" - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data), names=cols) def test_utf8_bom(self): @@ -1537,7 +1535,7 @@ class InvalidBuffer(object): msg = "Invalid file path or buffer object type" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(InvalidBuffer()) # gh-16135: we want to ensure that "tell" and "seek" @@ -1560,7 +1558,7 @@ def seek(self, pos, whence=0): tm.assert_frame_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(mock.Mock()) @tm.capture_stderr @@ -1595,11 +1593,3 @@ def test_skip_bad_lines(self): val = sys.stderr.getvalue() assert 'Skipping line 3' in val assert 'Skipping line 5' in val - - def test_buffer_rd_bytes_bad_unicode(self): - # Regression test for #22748 - t = BytesIO(b"\xB0") - if PY3: - t = TextIOWrapper(t, encoding='ascii', errors='surrogateescape') - with pytest.raises(UnicodeError): - pd.read_csv(t, encoding='UTF-8') diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py index 2d32e383c7fee..e5ada41c06762 100644 --- a/pandas/tests/io/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -52,19 +52,18 @@ def test_zip(self): for file_name in inner_file_names: tmp.writestr(file_name, data) - tm.assert_raises_regex(ValueError, 'Multiple files', - self.read_csv, path, compression='zip') + with pytest.raises(ValueError, match='Multiple files'): + self.read_csv(path, compression='zip') - tm.assert_raises_regex(ValueError, 'Multiple files', - self.read_csv, path, - compression='infer') + with pytest.raises(ValueError, match='Multiple files'): + self.read_csv(path, compression='infer') with tm.ensure_clean() as path: - with zipfile.ZipFile(path, mode='w') as tmp: + with zipfile.ZipFile(path, mode='w'): pass - tm.assert_raises_regex(ValueError, 'Zero files', - self.read_csv, path, compression='zip') + with pytest.raises(ValueError, match='Zero files'): + self.read_csv(path, compression='zip') with tm.ensure_clean() as path: with open(path, 'wb') as f: @@ -133,5 +132,5 @@ def test_read_csv_compressed_utf16_example(self, datapath): def test_invalid_compression(self): msg = 'Unrecognized compression type: sfark' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv('test_file.zip', compression='sfark') diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index f17ad019469ab..f8a498172eaf9 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -24,7 +24,7 @@ def test_converters_type_must_be_dict(self): data = """index,A,B,C,D foo,2,3,4,5 """ - with tm.assert_raises_regex(TypeError, 'Type converters.+'): + with pytest.raises(TypeError, match='Type converters.+'): self.read_csv(StringIO(data), converters=0) def test_converters(self): diff --git a/pandas/tests/io/parser/dialect.py b/pandas/tests/io/parser/dialect.py index 480ce9ef361d0..aa89f3167788a 100644 --- a/pandas/tests/io/parser/dialect.py +++ b/pandas/tests/io/parser/dialect.py @@ -7,6 +7,8 @@ import csv +import pytest + from pandas.compat import StringIO from pandas.errors import ParserWarning @@ -61,7 +63,7 @@ class InvalidDialect(object): data = 'a\n1' msg = 'Invalid dialect' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), dialect=InvalidDialect) def test_dialect_conflict(self): diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 3a6db0fafa7c6..fe7a16e6447b3 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -21,7 +21,7 @@ class HeaderTests(object): def test_read_with_bad_header(self): errmsg = r"but only \d+ lines in file" - with tm.assert_raises_regex(ValueError, errmsg): + with pytest.raises(ValueError, match=errmsg): s = StringIO(',,') self.read_csv(s, header=[10]) @@ -322,9 +322,9 @@ def test_non_int_header(self): # GH 16338 msg = 'header must be integer or list of integers' data = """1,2\n3,4""" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), sep=',', header=['a', 'b']) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), sep=',', header='string_header') def test_singleton_header(self): diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 4c2c5b754f9bb..751fb01e32a6a 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -429,11 +429,10 @@ def test_read_with_parse_dates_scalar_non_bool(self): data = """A,B,C 1,2,2003-11-1""" - tm.assert_raises_regex(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates="C") - tm.assert_raises_regex(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates="C", - index_col="C") + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates="C") + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates="C", index_col="C") def test_read_with_parse_dates_invalid_type(self): errmsg = ("Only booleans, lists, and " @@ -442,13 +441,12 @@ def test_read_with_parse_dates_invalid_type(self): data = """A,B,C 1,2,2003-11-1""" - tm.assert_raises_regex(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates=(1,)) - tm.assert_raises_regex(TypeError, errmsg, - self.read_csv, StringIO(data), - parse_dates=np.array([4, 5])) - tm.assert_raises_regex(TypeError, errmsg, self.read_csv, - StringIO(data), parse_dates={1, 3, 3}) + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates=(1,)) + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates=np.array([4, 5])) + with pytest.raises(TypeError, match=errmsg): + self.read_csv(StringIO(data), parse_dates={1, 3, 3}) def test_parse_dates_empty_string(self): # see gh-2263 diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index c3c87bca24a47..590736f720e67 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -36,17 +36,17 @@ def test_invalid_skipfooter(self): # see gh-15925 (comment) msg = "skipfooter must be an integer" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(text), skipfooter="foo") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(text), skipfooter=1.5) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(text), skipfooter=True) msg = "skipfooter cannot be negative" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(text), skipfooter=-1) def test_sniff_delimiter(self): @@ -220,13 +220,13 @@ def test_multi_char_sep_quotes(self): data = 'a,,b\n1,,a\n2,,"2,,b"' msg = 'ignored when a multi-char delimiter is used' - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data), sep=',,') # We expect no match, so there should be an assertion # error out of the inner context manager. with pytest.raises(AssertionError): - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) @@ -255,11 +255,11 @@ def test_skipfooter_bad_row(self): for data in ('a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz'): - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data), skipfooter=1) # We expect no match, so there should be an assertion # error out of the inner context manager. with pytest.raises(AssertionError): - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): self.read_csv(StringIO(data)) diff --git a/pandas/tests/io/parser/quoting.py b/pandas/tests/io/parser/quoting.py index 270a5430e6da4..a8a1cc5451f37 100644 --- a/pandas/tests/io/parser/quoting.py +++ b/pandas/tests/io/parser/quoting.py @@ -7,6 +7,8 @@ import csv +import pytest + from pandas.compat import PY3, StringIO, u from pandas.errors import ParserError @@ -22,29 +24,29 @@ def test_bad_quote_char(self): # Python 2.x: "...must be an 1-character..." # Python 3.x: "...must be a 1-character..." msg = '"quotechar" must be a(n)? 1-character string' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar='foo') + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar='foo') msg = 'quotechar must be set if quoting enabled' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar=None, - quoting=csv.QUOTE_MINIMAL) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar=None, + quoting=csv.QUOTE_MINIMAL) msg = '"quotechar" must be string, not int' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar=2) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar=2) def test_bad_quoting(self): data = '1,2,3' msg = '"quoting" must be an integer' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quoting='foo') + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quoting='foo') # quoting must in the range [0, 3] msg = 'bad "quoting" value' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quoting=5) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quoting=5) def test_quote_char_basic(self): data = 'a,b,c\n1,2,"cat"' @@ -70,13 +72,13 @@ def test_null_quote_char(self): # sanity checks msg = 'quotechar must be set if quoting enabled' - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar=None, - quoting=csv.QUOTE_MINIMAL) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar=None, + quoting=csv.QUOTE_MINIMAL) - tm.assert_raises_regex(TypeError, msg, self.read_csv, - StringIO(data), quotechar='', - quoting=csv.QUOTE_MINIMAL) + with pytest.raises(TypeError, match=msg): + self.read_csv(StringIO(data), quotechar='', + quoting=csv.QUOTE_MINIMAL) # no errors should be raised if quoting is None expected = DataFrame([[1, 2, 3]], @@ -163,7 +165,7 @@ def test_unbalanced_quoting(self): else: regex = "unexpected end of data" - with tm.assert_raises_regex(ParserError, regex): + with pytest.raises(ParserError, match=regex): self.read_csv(StringIO(data)) expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"]) diff --git a/pandas/tests/io/parser/skiprows.py b/pandas/tests/io/parser/skiprows.py index 5d1b3b207a240..a051ee9b22d10 100644 --- a/pandas/tests/io/parser/skiprows.py +++ b/pandas/tests/io/parser/skiprows.py @@ -8,6 +8,7 @@ from datetime import datetime import numpy as np +import pytest from pandas.compat import StringIO, lrange, range from pandas.errors import EmptyDataError @@ -215,11 +216,11 @@ def test_skiprows_callable(self): skiprows = lambda x: True msg = "No columns to parse from file" - with tm.assert_raises_regex(EmptyDataError, msg): + with pytest.raises(EmptyDataError, match=msg): self.read_csv(StringIO(data), skiprows=skiprows) # This is a bad callable and should raise. msg = "by zero" skiprows = lambda x: 1 / 0 - with tm.assert_raises_regex(ZeroDivisionError, msg): + with pytest.raises(ZeroDivisionError, match=msg): self.read_csv(StringIO(data), skiprows=skiprows) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index f7846f7824ba5..bb64a85590c8b 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -69,11 +69,10 @@ def test_fwf(self): StringIO(data3), colspecs=colspecs, delimiter='~', header=None) tm.assert_frame_equal(df, expected) - with tm.assert_raises_regex(ValueError, - "must specify only one of"): + with pytest.raises(ValueError, match="must specify only one of"): read_fwf(StringIO(data3), colspecs=colspecs, widths=[6, 10, 10, 7]) - with tm.assert_raises_regex(ValueError, "Must specify either"): + with pytest.raises(ValueError, match="Must specify either"): read_fwf(StringIO(data3), colspecs=None, widths=None) def test_BytesIO_input(self): @@ -96,9 +95,8 @@ def test_fwf_colspecs_is_list_or_tuple(self): bar2,12,13,14,15 """ - with tm.assert_raises_regex(TypeError, - 'column specifications must ' - 'be a list or tuple.+'): + msg = 'column specifications must be a list or tuple.+' + with pytest.raises(TypeError, match=msg): pd.io.parsers.FixedWidthReader(StringIO(data), {'a': 1}, ',', '#') @@ -112,9 +110,8 @@ def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self): bar2,12,13,14,15 """ - with tm.assert_raises_regex(TypeError, - 'Each column specification ' - 'must be.+'): + msg = 'Each column specification must be.+' + with pytest.raises(TypeError, match=msg): read_fwf(StringIO(data), [('a', 1)]) def test_fwf_colspecs_None(self): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 4437b0db9054e..8c6dbd64c785d 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -33,7 +33,7 @@ def test_mangle_dupe_cols_false(self): msg = 'is not supported' for engine in ('c', 'python'): - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) @@ -43,14 +43,14 @@ def test_c_engine(self): msg = 'does not support' # specify C engine with unsupported options (raise) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine='c', sep=None, delim_whitespace=False) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine='c', sep=r'\s') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine='c', sep='\t', quotechar=chr(128)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine='c', skipfooter=1) # specify C-unsupported options without python-unsupported options @@ -70,9 +70,9 @@ def test_c_engine(self): x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" msg = 'Error tokenizing data' - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): read_csv(StringIO(text), sep='\\s+') - with tm.assert_raises_regex(ParserError, msg): + with pytest.raises(ParserError, match=msg): read_csv(StringIO(text), engine='c', sep='\\s+') msg = "Only length-1 thousands markers supported" @@ -80,14 +80,14 @@ def test_c_engine(self): 1|2,334|5 10|13|10. """ - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), thousands=',,') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), thousands='') msg = "Only length-1 line terminators supported" data = 'a,b,c~~1,2,3~~4,5,6' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), lineterminator='~~') def test_python_engine(self, python_engine): @@ -104,7 +104,7 @@ def test_python_engine(self, python_engine): 'with the %r engine' % (default, python_engine)) kwargs = {default: object()} - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine=python_engine, **kwargs) def test_python_engine_file_no_next(self, python_engine): @@ -122,7 +122,7 @@ def read(self): data = "a\n1" msg = "The 'python' engine cannot iterate" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_csv(NoNextBuffer(data), engine=python_engine) diff --git a/pandas/tests/io/parser/usecols.py b/pandas/tests/io/parser/usecols.py index 82d45b163d16a..e9bb72be124d3 100644 --- a/pandas/tests/io/parser/usecols.py +++ b/pandas/tests/io/parser/usecols.py @@ -31,7 +31,7 @@ def test_raise_on_mixed_dtype_usecols(self): usecols = [0, 'b', 2] - with tm.assert_raises_regex(ValueError, self.msg_validate_usecols_arg): + with pytest.raises(ValueError, match=self.msg_validate_usecols_arg): self.read_csv(StringIO(data), usecols=usecols) def test_usecols(self): @@ -97,7 +97,7 @@ def test_usecols_single_string(self): usecols = 'foo' - with tm.assert_raises_regex(ValueError, self.msg_validate_usecols_arg): + with pytest.raises(ValueError, match=self.msg_validate_usecols_arg): self.read_csv(StringIO(data), usecols=usecols) def test_usecols_index_col_False(self): @@ -363,10 +363,10 @@ def test_usecols_with_mixed_encoding_strings(self): 3.568935038,7,False,a ''' - with tm.assert_raises_regex(ValueError, self.msg_validate_usecols_arg): + with pytest.raises(ValueError, match=self.msg_validate_usecols_arg): self.read_csv(StringIO(s), usecols=[u'AAA', b'BBB']) - with tm.assert_raises_regex(ValueError, self.msg_validate_usecols_arg): + with pytest.raises(ValueError, match=self.msg_validate_usecols_arg): self.read_csv(StringIO(s), usecols=[b'AAA', u'BBB']) def test_usecols_with_multibyte_characters(self): @@ -499,21 +499,21 @@ def test_raise_on_usecols_names_mismatch(self): tm.assert_frame_equal(df, expected) usecols = ['a', 'b', 'c', 'f'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\['f'\]")): + msg = self.msg_validate_usecols_names.format(r"\['f'\]") + + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), usecols=usecols) usecols = ['a', 'b', 'f'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\['f'\]")): + msg = self.msg_validate_usecols_names.format(r"\['f'\]") + + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), usecols=usecols) usecols = ['a', 'b', 'f', 'g'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\[('f', 'g'|'g', 'f')\]")): + msg = self.msg_validate_usecols_names.format( + r"\[('f', 'g'|'g', 'f')\]") + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), usecols=usecols) names = ['A', 'B', 'C', 'D'] @@ -537,13 +537,14 @@ def test_raise_on_usecols_names_mismatch(self): # tm.assert_frame_equal(df, expected) usecols = ['A', 'B', 'C', 'f'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\['f'\]")): + msg = self.msg_validate_usecols_names.format(r"\['f'\]") + + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), header=0, names=names, usecols=usecols) + usecols = ['A', 'B', 'f'] - with tm.assert_raises_regex(ValueError, - self.msg_validate_usecols_names.format( - r"\['f'\]")): + msg = self.msg_validate_usecols_names.format(r"\['f'\]") + + with pytest.raises(ValueError, match=msg): self.read_csv(StringIO(data), names=names, usecols=usecols) diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py index b85f6b6bbd5ce..016dc56b4d800 100644 --- a/pandas/tests/io/sas/test_sas.py +++ b/pandas/tests/io/sas/test_sas.py @@ -1,8 +1,8 @@ +import pytest + from pandas.compat import StringIO from pandas import read_sas -import pandas.util.testing as tm - class TestSas(object): @@ -12,5 +12,5 @@ def test_sas_buffer_format(self): msg = ("If this is a buffer object rather than a string " "name, you must specify a format string") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_sas(b) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 73e29e6eb9a6a..2f2b792588a92 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -269,14 +269,15 @@ def test_constructor_bad_file(self, mmap_file): msg = "[Errno 22]" err = mmap.error - tm.assert_raises_regex(err, msg, icom.MMapWrapper, non_file) + with pytest.raises(err, match=msg): + icom.MMapWrapper(non_file) target = open(mmap_file, 'r') target.close() msg = "I/O operation on closed file" - tm.assert_raises_regex( - ValueError, msg, icom.MMapWrapper, target) + with pytest.raises(ValueError, match=msg): + icom.MMapWrapper(target) def test_get_attr(self, mmap_file): with open(mmap_file, 'r') as target: @@ -307,5 +308,5 @@ def test_unknown_engine(self): with tm.ensure_clean() as path: df = tm.makeDataFrame() df.to_csv(path) - with tm.assert_raises_regex(ValueError, 'Unknown engine'): + with pytest.raises(ValueError, match='Unknown engine'): pd.read_csv(path, engine='pyt') diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 1bd2fb5887e38..4bff39f8c7efc 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -564,12 +564,12 @@ def test_sheet_name_and_sheetname(self, ext): tm.assert_frame_equal(df2_parse, dfref, check_names=False) def test_sheet_name_both_raises(self, ext): - with tm.assert_raises_regex(TypeError, "Cannot specify both"): + with pytest.raises(TypeError, match="Cannot specify both"): self.get_exceldf('test1', ext, sheetname='Sheet1', sheet_name='Sheet1') excel = self.get_excelfile('test1', ext) - with tm.assert_raises_regex(TypeError, "Cannot specify both"): + with pytest.raises(TypeError, match="Cannot specify both"): excel.parse(sheetname='Sheet1', sheet_name='Sheet1') @@ -1040,7 +1040,7 @@ def test_read_excel_nrows_greater_than_nrows_in_file(self, ext): def test_read_excel_nrows_non_integer_parameter(self, ext): # GH 16645 msg = "'nrows' must be an integer >=0" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), nrows='5') @@ -2133,7 +2133,7 @@ def test_write_append_mode_raises(self, merge_cells, ext, engine): msg = "Append mode is not supported with xlwt!" with ensure_clean(ext) as f: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): ExcelWriter(f, engine=engine, mode='a') @@ -2191,7 +2191,7 @@ def test_write_append_mode_raises(self, merge_cells, ext, engine): msg = "Append mode is not supported with xlsxwriter!" with ensure_clean(ext) as f: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): ExcelWriter(f, engine=engine, mode='a') @@ -2215,7 +2215,7 @@ def test_ExcelWriter_dispatch(self, klass, ext): assert isinstance(writer, klass) def test_ExcelWriter_dispatch_raises(self): - with tm.assert_raises_regex(ValueError, 'No engine'): + with pytest.raises(ValueError, match='No engine'): ExcelWriter('nothing') @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index fea3c23121ab2..4201f751959b5 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -56,7 +56,7 @@ def assert_framelist_equal(list1, list2, *args, **kwargs): def test_bs4_version_fails(monkeypatch, datapath): import bs4 monkeypatch.setattr(bs4, '__version__', '4.2') - with tm.assert_raises_regex(ValueError, "minimum version"): + with pytest.raises(ValueError, match="minimum version"): read_html(datapath("io", "data", "spam.html"), flavor='bs4') @@ -65,7 +65,7 @@ def test_invalid_flavor(): flavor = "invalid flavor" msg = r"\{" + flavor + r"\} is not a valid set of flavors" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): read_html(url, "google", flavor=flavor) @@ -204,8 +204,8 @@ def test_skiprows_ndarray(self): assert_framelist_equal(df1, df2) def test_skiprows_invalid(self): - with tm.assert_raises_regex(TypeError, 'is not a valid type ' - 'for skipping rows'): + with pytest.raises(TypeError, match=('is not a valid type ' + 'for skipping rows')): self.read_html(self.spam_data, '.*Water.*', skiprows='asdf') def test_index(self): @@ -288,7 +288,7 @@ def test_file_url(self): @pytest.mark.slow def test_invalid_table_attrs(self): url = self.banklist_data - with tm.assert_raises_regex(ValueError, 'No tables found'): + with pytest.raises(ValueError, match='No tables found'): self.read_html(url, 'First Federal Bank of Florida', attrs={'id': 'tasdfable'}) @@ -341,8 +341,8 @@ def test_regex_idempotency(self): assert isinstance(df, DataFrame) def test_negative_skiprows(self): - with tm.assert_raises_regex(ValueError, - r'\(you passed a negative value\)'): + msg = r'\(you passed a negative value\)' + with pytest.raises(ValueError, match=msg): self.read_html(self.spam_data, 'Water', skiprows=-1) @network @@ -822,10 +822,9 @@ def test_parse_dates_combine(self): def test_computer_sales_page(self, datapath): data = datapath('io', 'data', 'computer_sales_page.html') - with tm.assert_raises_regex(ParserError, - r"Passed header=\[0,1\] are " - r"too many rows for this " - r"multi_index of columns"): + msg = (r"Passed header=\[0,1\] are too many " + r"rows for this multi_index of columns") + with pytest.raises(ParserError, match=msg): self.read_html(data, header=[0, 1]) data = datapath('io', 'data', 'computer_sales_page.html') @@ -839,10 +838,9 @@ def test_wikipedia_states_table(self, datapath): assert result['sq mi'].dtype == np.dtype('float64') def test_parser_error_on_empty_header_row(self): - with tm.assert_raises_regex(ParserError, - r"Passed header=\[0,1\] are " - r"too many rows for this " - r"multi_index of columns"): + msg = (r"Passed header=\[0,1\] are too many " + r"rows for this multi_index of columns") + with pytest.raises(ParserError, match=msg): self.read_html(""" diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index a47c3c01fc80e..85d467650d5c4 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -370,8 +370,7 @@ def test_write_explicit(self, compression, get_random_path): @pytest.mark.parametrize('compression', ['', 'None', 'bad', '7z']) def test_write_explicit_bad(self, compression, get_random_path): - with tm.assert_raises_regex(ValueError, - "Unrecognized compression type"): + with pytest.raises(ValueError, match="Unrecognized compression type"): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, compression=compression) @@ -474,7 +473,7 @@ def test_read_bad_versions(self, protocol, get_random_path): # For Python 2, HIGHEST_PROTOCOL should be 2. msg = ("pickle protocol {protocol} asked for; the highest available " "protocol is 2").format(protocol=protocol) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, protocol=protocol) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index b6cf660cf171e..7e1b43e81f9c1 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1084,9 +1084,7 @@ def test_encoding(self): def test_latin_encoding(self): if compat.PY2: - tm.assert_raises_regex( - TypeError, r'\[unicode\] is not implemented as a table column') - return + pytest.skip("[unicode] is not implemented as a table column") values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'], [b'E\xc9, 17', b'a', b'b', b'c'], @@ -2598,8 +2596,8 @@ def test_terms(self): for t in terms: store.select('wp', t) - with tm.assert_raises_regex( - TypeError, 'Only named functions are supported'): + with pytest.raises(TypeError, + match='Only named functions are supported'): store.select( 'wp', 'major_axis == (lambda x: x)("20130101")') @@ -2610,9 +2608,8 @@ def test_terms(self): expected = Panel({-1: wpneg[-1]}) tm.assert_panel_equal(res, expected) - with tm.assert_raises_regex(NotImplementedError, - 'Unary addition ' - 'not supported'): + msg = 'Unary addition not supported' + with pytest.raises(NotImplementedError, match=msg): store.select('wpneg', 'items == +1') def test_term_compat(self): @@ -4520,9 +4517,8 @@ def f(): pytest.raises(ClosedFileError, store.get_storer, 'df2') pytest.raises(ClosedFileError, store.remove, 'df2') - def f(): + with pytest.raises(ClosedFileError, match='file is not open'): store.select('df') - tm.assert_raises_regex(ClosedFileError, 'file is not open', f) def test_pytables_native_read(self, datapath): with ensure_clean_store( @@ -4971,9 +4967,8 @@ def test_to_hdf_with_object_column_names(self): df = DataFrame(np.random.randn(10, 2), columns=index(2)) with ensure_clean_path(self.path) as path: with catch_warnings(record=True): - with tm.assert_raises_regex( - ValueError, ("cannot have non-object label " - "DataIndexableCol")): + msg = "cannot have non-object label DataIndexableCol" + with pytest.raises(ValueError, match=msg): df.to_hdf(path, 'df', format='table', data_columns=True) @@ -5155,14 +5150,14 @@ def test_query_compare_column_type(self): pd.Timedelta(1, 's')]: query = 'date {op} v'.format(op=op) with pytest.raises(TypeError): - result = store.select('test', where=query) + store.select('test', where=query) # strings to other columns must be convertible to type v = 'a' for col in ['int', 'float', 'real_date']: query = '{col} {op} v'.format(op=op, col=col) with pytest.raises(ValueError): - result = store.select('test', where=query) + store.select('test', where=query) for v, col in zip(['1', '1.1', '2014-01-01'], ['int', 'float', 'real_date']): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 777b04bbae97d..6bb7800b72110 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -993,7 +993,7 @@ def test_database_uri_string(self): pass db_uri = "postgresql+pg8000://user:pass@host/dbname" - with tm.assert_raises_regex(ImportError, "pg8000"): + with pytest.raises(ImportError, match="pg8000"): sql.read_sql("select * from table", db_uri) def _make_iris_table_metadata(self): diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 54d17a4773749..1f0a0d6bfee95 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -22,7 +22,7 @@ def test_import_error_message(): # GH-19810 df = DataFrame({"A": [1, 2]}) - with tm.assert_raises_regex(ImportError, 'matplotlib is required'): + with pytest.raises(ImportError, match='matplotlib is required'): df.plot() diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index e965ff7a78a39..5387a1043e00e 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -237,9 +237,9 @@ def test_join_on_fails_with_wrong_object_type(self, wrong_type): # Edited test to remove the Series object from test parameters df = DataFrame({'a': [1, 1]}) - with tm.assert_raises_regex(TypeError, str(type(wrong_type))): + with pytest.raises(TypeError, match=str(type(wrong_type))): merge(wrong_type, df, left_on='a', right_on='a') - with tm.assert_raises_regex(TypeError, str(type(wrong_type))): + with pytest.raises(TypeError, match=str(type(wrong_type))): merge(df, wrong_type, left_on='a', right_on='a') def test_join_on_pass_vector(self): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 2b4a7952ae738..d9297cdc5ab3e 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -275,7 +275,7 @@ def test_no_overlap_more_informative_error(self): 'left_index={lidx}, right_index={ridx}' .format(lon=None, ron=None, lidx=False, ridx=False)) - with tm.assert_raises_regex(MergeError, msg): + with pytest.raises(MergeError, match=msg): merge(df1, df2) def test_merge_non_unique_indexes(self): @@ -1472,7 +1472,7 @@ def test_different(self, right_vals): "If you wish to proceed you should use " "pd.concat".format(lk_dtype=left['A'].dtype, rk_dtype=right['A'].dtype)) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.merge(left, right, on='A') @pytest.mark.parametrize('d1', [np.int64, np.int32, @@ -1599,7 +1599,7 @@ def test_merge_incompat_dtypes(self, df1_vals, df2_vals): "you should use pd.concat".format(lk_dtype=df1['A'].dtype, rk_dtype=df2['A'].dtype)) msg = re.escape(msg) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.merge(df1, df2, on=['A']) # Check that error still raised when swapping order of dataframes @@ -1608,7 +1608,7 @@ def test_merge_incompat_dtypes(self, df1_vals, df2_vals): "you should use pd.concat".format(lk_dtype=df2['A'].dtype, rk_dtype=df1['A'].dtype)) msg = re.escape(msg) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.merge(df2, df1, on=['A']) @@ -1938,6 +1938,6 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nms, nm): left_index=left_index, right_index=right_index) tm.assert_frame_equal(result, expected) else: - with tm.assert_raises_regex(ValueError, 'a Series without a name'): + with pytest.raises(ValueError, match='a Series without a name'): result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index cf39293f47082..71db7844a9db5 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -6,7 +6,6 @@ from pandas import (merge_asof, read_csv, to_datetime, Timedelta) from pandas.core.reshape.merge import MergeError -from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal @@ -1005,7 +1004,7 @@ def test_merge_datatype_error(self): right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7], 'a': [1, 2, 3, 6, 7]}) - with tm.assert_raises_regex(MergeError, msg): + with pytest.raises(MergeError, match=msg): merge_asof(left, right, on='a') @pytest.mark.parametrize('func', [lambda x: x, lambda x: to_datetime(x)], @@ -1019,7 +1018,7 @@ def test_merge_on_nans(self, func, side): df_null = pd.DataFrame({'a': nulls, 'left_val': ['a', 'b', 'c']}) df = pd.DataFrame({'a': non_nulls, 'right_val': [1, 6, 11]}) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): if side == 'left': merge_asof(df_null, df, on='a') else: diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 42d8eb7273ee1..0f8ecc6370bfd 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -1,6 +1,6 @@ +import pytest import pandas as pd from pandas import DataFrame, merge_ordered -from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal from numpy import nan @@ -76,7 +76,8 @@ def test_empty_sequence_concat(self): ([None, None], none_pat) ] for df_seq, pattern in test_cases: - tm.assert_raises_regex(ValueError, pattern, pd.concat, df_seq) + with pytest.raises(ValueError, match=pattern): + pd.concat(df_seq) pd.concat([pd.DataFrame()]) pd.concat([None, pd.DataFrame()]) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 673658c29fe75..c7fba47a8f27c 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -147,12 +147,10 @@ def test_concatlike_same_dtypes(self): tm.assert_index_equal(res, exp) # cannot append non-index - with tm.assert_raises_regex(TypeError, - 'all inputs must be Index'): + with pytest.raises(TypeError, match='all inputs must be Index'): pd.Index(vals1).append(vals2) - with tm.assert_raises_regex(TypeError, - 'all inputs must be Index'): + with pytest.raises(TypeError, match='all inputs must be Index'): pd.Index(vals1).append([pd.Index(vals2), vals3]) # ----- Series ----- # @@ -202,16 +200,16 @@ def test_concatlike_same_dtypes(self): msg = (r'cannot concatenate object of type \"(.+?)\";' ' only pd.Series, pd.DataFrame, and pd.Panel' r' \(deprecated\) objs are valid') - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.Series(vals1).append(vals2) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.Series(vals1).append([pd.Series(vals2), vals3]) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.concat([pd.Series(vals1), vals2]) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): pd.concat([pd.Series(vals1), pd.Series(vals2), vals3]) def test_concatlike_dtypes_coercion(self): diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index e83a2cb483de7..16ecb07c5f413 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -112,7 +112,7 @@ def test_tuple_vars_fail_with_multiindex(self): for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), (tuple_a, tuple_b)): - with tm.assert_raises_regex(ValueError, r'MultiIndex'): + with pytest.raises(ValueError, match=r'MultiIndex'): self.df1.melt(id_vars=id_vars, value_vars=value_vars) def test_custom_var_name(self): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 1cb036dccf23c..69572f75fea1b 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1674,22 +1674,22 @@ def test_crosstab_errors(self): 'c': [1, 1, np.nan, 1, 1]}) error = 'values cannot be used without an aggfunc.' - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, values=df.c) error = 'aggfunc cannot be used without values' - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, aggfunc=np.mean) error = 'Not a valid normalize argument' - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, normalize='42') - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, normalize=42) error = 'Not a valid margins argument' - with tm.assert_raises_regex(ValueError, error): + with pytest.raises(ValueError, match=error): pd.crosstab(df.a, df.b, normalize='all', margins=42) def test_crosstab_with_categorial_columns(self): diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 44de3e93d42bf..f04e9a55a6c8d 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -205,8 +205,8 @@ def test_qcut_specify_quantiles(self): tm.assert_categorical_equal(factor, expected) def test_qcut_all_bins_same(self): - tm.assert_raises_regex(ValueError, "edges.*unique", qcut, - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) + with pytest.raises(ValueError, match="edges.*unique"): + qcut([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) def test_cut_out_of_bounds(self): arr = np.random.randn(100) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index 8743d11118200..80538b0c6de4e 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -58,11 +58,11 @@ def test_union_categorical(self): s = Categorical([0, 1.2, 2]) s2 = Categorical([2, 3, 4]) msg = 'dtype of categories must be the same' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([s, s2]) msg = 'No Categoricals to union' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): union_categoricals([]) def test_union_categoricals_nan(self): @@ -143,7 +143,7 @@ def test_union_categoricals_ordered(self): c2 = Categorical([1, 2, 3], ordered=False) msg = 'Categorical.ordered must be the same' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2]) res = union_categoricals([c1, c1]) @@ -161,7 +161,7 @@ def test_union_categoricals_ordered(self): c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) msg = "to union ordered Categoricals, all categories must be the same" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2]) def test_union_categoricals_ignore_order(self): @@ -174,7 +174,7 @@ def test_union_categoricals_ignore_order(self): tm.assert_categorical_equal(res, exp) msg = 'Categorical.ordered must be the same' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2], ignore_order=False) res = union_categoricals([c1, c1], ignore_order=True) @@ -212,10 +212,10 @@ def test_union_categoricals_ignore_order(self): tm.assert_categorical_equal(result, expected) msg = "to union ordered Categoricals, all categories must be the same" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2], ignore_order=False) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): union_categoricals([c1, c2]) def test_union_categoricals_sort(self): diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index e4a9591b95c26..e7e1626bdb2da 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -1,4 +1,4 @@ - +import pytest import numpy as np from pandas import date_range, Index import pandas.util.testing as tm @@ -41,9 +41,12 @@ def test_empty(self): expected = [] assert result == expected - def test_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] + @pytest.mark.parametrize("X", [ + 1, [1], [1, 2], [[1], 2], + 'a', ['a'], ['a', 'b'], [['a'], 'b'] + ]) + def test_invalid_input(self, X): msg = "Input must be a list-like of list-likes" - for X in invalid_inputs: - tm.assert_raises_regex(TypeError, msg, cartesian_product, X=X) + + with pytest.raises(TypeError, match=msg): + cartesian_product(X=X) diff --git a/pandas/tests/scalar/interval/test_interval.py b/pandas/tests/scalar/interval/test_interval.py index 8d17989ebc7b1..7951fb7ddda0d 100644 --- a/pandas/tests/scalar/interval/test_interval.py +++ b/pandas/tests/scalar/interval/test_interval.py @@ -5,7 +5,6 @@ import pandas.core.common as com import pytest -import pandas.util.testing as tm @pytest.fixture @@ -35,7 +34,7 @@ def test_contains(self, interval): assert 0 not in interval msg = "__contains__ not defined for two intervals" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval in interval interval_both = Interval(0, 1, closed='both') @@ -53,7 +52,7 @@ def test_equal(self): assert Interval(0, 1) != 0 def test_comparison(self): - with tm.assert_raises_regex(TypeError, 'unorderable types'): + with pytest.raises(TypeError, match='unorderable types'): Interval(0, 1) < 2 assert Interval(0, 1) < Interval(1, 2) @@ -106,7 +105,7 @@ def test_length_errors(self, left, right): # GH 18789 iv = Interval(left, right) msg = 'cannot compute length between .* and .*' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): iv.length def test_math_add(self, closed): @@ -124,10 +123,10 @@ def test_math_add(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for \+" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval + interval - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval + 'foo' def test_math_sub(self, closed): @@ -142,10 +141,10 @@ def test_math_sub(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for -" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval - interval - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval - 'foo' def test_math_mult(self, closed): @@ -163,11 +162,11 @@ def test_math_mult(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for \*" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval * interval msg = r"can\'t multiply sequence by non-int" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval * 'foo' def test_math_div(self, closed): @@ -182,10 +181,10 @@ def test_math_div(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for /" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval / interval - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval / 'foo' def test_math_floordiv(self, closed): @@ -200,19 +199,19 @@ def test_math_floordiv(self, closed): assert result == expected msg = r"unsupported operand type\(s\) for //" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval // interval - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval // 'foo' def test_constructor_errors(self): msg = "invalid option for 'closed': foo" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Interval(0, 1, closed='foo') msg = 'left side of interval must be <= right side' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Interval(1, 0) @pytest.mark.parametrize('tz_left, tz_right', [ diff --git a/pandas/tests/scalar/interval/test_ops.py b/pandas/tests/scalar/interval/test_ops.py index 7eca24aa8af25..869ff205c2f51 100644 --- a/pandas/tests/scalar/interval/test_ops.py +++ b/pandas/tests/scalar/interval/test_ops.py @@ -2,7 +2,6 @@ import pytest from pandas import Interval, Timedelta, Timestamp -import pandas.util.testing as tm @pytest.fixture(params=[ @@ -57,5 +56,5 @@ def test_overlaps_invalid_type(self, other): interval = Interval(0, 1) msg = '`other` must be an Interval, got {other}'.format( other=type(other).__name__) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): interval.overlaps(other) diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 432d55ef5967a..23762fda8c22a 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -329,26 +329,26 @@ def test_conv_weekly(self): assert ival_W.asfreq('W') == ival_W msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): ival_W.asfreq('WK') def test_conv_weekly_legacy(self): # frequency conversion tests: from Weekly Frequency msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK', year=2007, month=1, day=1) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-SAT', year=2007, month=1, day=6) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-FRI', year=2007, month=1, day=5) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-THU', year=2007, month=1, day=4) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-WED', year=2007, month=1, day=3) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-TUE', year=2007, month=1, day=2) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK-MON', year=2007, month=1, day=1) def test_conv_business(self): @@ -742,10 +742,10 @@ def test_asfreq_MS(self): assert initial.asfreq(freq="M", how="S") == Period('2013-01', 'M') msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): initial.asfreq(freq="MS", how="S") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.Period('2013-01', 'MS') assert _period_code_map.get("MS") is None diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 66e8541d2c911..6d5686463f2ae 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -142,11 +142,11 @@ def test_period_cons_mult(self): msg = ('Frequency must be positive, because it' ' represents span: -3M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='-3M') msg = ('Frequency must be positive, because it' ' represents span: 0M') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='0M') def test_period_cons_combined(self): @@ -196,28 +196,28 @@ def test_period_cons_combined(self): msg = ('Frequency must be positive, because it' ' represents span: -25H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='-1D1H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='-1H1D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq='-1D1H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq='-1H1D') msg = ('Frequency must be positive, because it' ' represents span: 0D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='0D0H') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq='0D0H') # You can only combine together day and intraday offsets msg = ('Invalid frequency: 1W1D') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='1W1D') msg = ('Invalid frequency: 1D1W') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2011-01', freq='1D1W') @pytest.mark.parametrize('tzstr', ['Europe/Brussels', @@ -528,9 +528,9 @@ def test_period_deprecated_freq(self): msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG for exp, freqs in iteritems(cases): for freq in freqs: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period('2016-03-01 09:00', freq=freq) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq=freq) # check supported freq-aliases still works @@ -774,7 +774,7 @@ def test_properties_weekly_legacy(self): assert exp.days_in_month == 29 msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Period(freq='WK', year=2007, month=1, day=7) def test_properties_daily(self): @@ -1036,14 +1036,14 @@ def test_add_raises(self): dt1 = Period(freq='D', year=2008, month=1, day=1) dt2 = Period(freq='D', year=2008, month=1, day=2) msg = r"unsupported operand type\(s\)" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): dt1 + "str" msg = r"unsupported operand type\(s\)" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): "str" + dt1 - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): dt1 + dt2 boxes = [lambda x: x, lambda x: pd.Series([x]), lambda x: pd.Index([x])] @@ -1061,13 +1061,13 @@ def test_add_timestamp_raises(self, rbox, lbox): msg = (r"cannot add|unsupported operand|" r"can only operate on a|incompatible type|" r"ufunc add cannot use operands") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): lbox(ts) + rbox(per) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): lbox(per) + rbox(ts) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): lbox(per) + rbox(per) def test_sub(self): @@ -1079,7 +1079,7 @@ def test_sub(self): assert dt2 - dt1 == 14 * off msg = r"Input has different freq=M from Period\(freq=D\)" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): dt1 - Period('2011-02', freq='M') def test_add_offset(self): @@ -1435,10 +1435,10 @@ def test_period_ops_offset(self): assert result == exp msg = r"Input cannot be converted to Period\(freq=D\)" - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): p + offsets.Hour(2) - with tm.assert_raises_regex(period.IncompatibleFrequency, msg): + with pytest.raises(period.IncompatibleFrequency, match=msg): p - offsets.Hour(2) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index d648140aa7347..4165b1aec705f 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd -import pandas.util.testing as tm from pandas import Timedelta @@ -90,15 +89,16 @@ def test_construction(): Timedelta('3.1415') # invalid construction - tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", - lambda: Timedelta()) - tm.assert_raises_regex(ValueError, - "unit abbreviation w/o a number", - lambda: Timedelta('foo')) - tm.assert_raises_regex(ValueError, - "cannot construct a Timedelta from the " - "passed arguments, allowed keywords are ", - lambda: Timedelta(day=10)) + with pytest.raises(ValueError, match="cannot construct a Timedelta"): + Timedelta() + + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + Timedelta('foo') + + msg = ("cannot construct a Timedelta from " + "the passed arguments, allowed keywords are ") + with pytest.raises(ValueError, match=msg): + Timedelta(day=10) # floats expected = np.timedelta64( @@ -190,8 +190,8 @@ def test_iso_constructor(fmt, exp): 'P1DT0H0M0.0000000000000S', 'P1DT0H0M00000000000S', 'P1DT0H0M0.S']) def test_iso_constructor_raises(fmt): - with tm.assert_raises_regex(ValueError, 'Invalid ISO 8601 Duration ' - 'format - {}'.format(fmt)): + with pytest.raises(ValueError, match=('Invalid ISO 8601 Duration ' + 'format - {}'.format(fmt))): Timedelta(fmt) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 7af0b281aeaa5..47f91fdf25756 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -334,20 +334,20 @@ def test_constructor_with_stringoffset(self): assert result == eval(repr(result)) def test_constructor_invalid(self): - with tm.assert_raises_regex(TypeError, 'Cannot convert input'): + with pytest.raises(TypeError, match='Cannot convert input'): Timestamp(slice(2)) - with tm.assert_raises_regex(ValueError, 'Cannot convert Period'): + with pytest.raises(ValueError, match='Cannot convert Period'): Timestamp(Period('1000-01-01')) def test_constructor_invalid_tz(self): # GH#17690 - with tm.assert_raises_regex(TypeError, 'must be a datetime.tzinfo'): + with pytest.raises(TypeError, match='must be a datetime.tzinfo'): Timestamp('2017-10-22', tzinfo='US/Eastern') - with tm.assert_raises_regex(ValueError, 'at most one of'): + with pytest.raises(ValueError, match='at most one of'): Timestamp('2017-10-22', tzinfo=utc, tz='UTC') - with tm.assert_raises_regex(ValueError, "Invalid frequency:"): + with pytest.raises(ValueError, match="Invalid frequency:"): # GH#5168 # case where user tries to pass tz as an arg, not kwarg, gets # interpreted as a `freq` @@ -577,7 +577,7 @@ def test_construct_timestamp_preserve_original_frequency(self): def test_constructor_invalid_frequency(self): # GH 22311 - with tm.assert_raises_regex(ValueError, "Invalid frequency:"): + with pytest.raises(ValueError, match="Invalid frequency:"): Timestamp('2012-01-01', freq=[]) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 827ad3581cd49..6755d0bd4ae27 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -64,14 +64,14 @@ def test_tz_localize_ambiguous(self): ts.tz_localize('US/Eastern', ambiguous='infer') # GH#8025 - with tm.assert_raises_regex(TypeError, - 'Cannot localize tz-aware Timestamp, ' - 'use tz_convert for conversions'): + msg = ('Cannot localize tz-aware Timestamp, ' + 'use tz_convert for conversions') + with pytest.raises(TypeError, match=msg): Timestamp('2011-01-01', tz='US/Eastern').tz_localize('Asia/Tokyo') - with tm.assert_raises_regex(TypeError, - 'Cannot convert tz-naive Timestamp, ' - 'use tz_localize to localize'): + msg = ('Cannot convert tz-naive Timestamp, ' + 'use tz_localize to localize') + with pytest.raises(TypeError, match=msg): Timestamp('2011-01-01').tz_convert('Asia/Tokyo') @pytest.mark.parametrize('stamp, tz', [ diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 0c477a021df4d..a9a60c4119605 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -76,7 +76,7 @@ def test_round_nonstandard_freq(self): def test_round_invalid_arg(self): stamp = Timestamp('2000-01-05 05:09:15.13') - with tm.assert_raises_regex(ValueError, INVALID_FREQ_ERR_MSG): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): stamp.round('foo') @pytest.mark.parametrize('test_input, rounder, freq, expected', [ diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 57a087221f411..e2cffe653d935 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -520,7 +520,7 @@ def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels, def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc): - with tm.assert_raises_regex(error_type, error_desc): + with pytest.raises(error_type, match=error_desc): Series(data, index=index).drop(drop_labels, axis=axis) @@ -557,5 +557,5 @@ def test_drop_empty_list(index, drop_labels): ]) def test_drop_non_empty_list(data, index, drop_labels): # GH 21494 and GH 16877 - with tm.assert_raises_regex(KeyError, 'not found in axis'): + with pytest.raises(KeyError, match='not found in axis'): pd.Series(data=data, index=index).drop(drop_labels) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 5d1b81ba7dc1c..b94104a89627a 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -322,11 +322,11 @@ def test_where_invalid_input(cond): s = Series([1, 2, 3]) msg = "Boolean array expected for the condition" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.where(cond) msg = "Array conditional must be same shape as self" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.where([True]) @@ -335,7 +335,7 @@ def test_where_ndframe_align(): s = Series([1, 2, 3]) cond = [True] - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.where(cond) expected = Series([1, np.nan, np.nan]) @@ -344,7 +344,7 @@ def test_where_ndframe_align(): tm.assert_series_equal(out, expected) cond = np.array([False, True, False, True]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.where(cond) expected = Series([np.nan, 2, np.nan]) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 1582bd119c806..f969619d5acb0 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -394,9 +394,9 @@ def test_setslice(test_data): @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_basic_getitem_setitem_corner(test_data): # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2] - with tm.assert_raises_regex(ValueError, 'tuple-index'): + with pytest.raises(ValueError, match='tuple-index'): test_data.ts[:, 2] - with tm.assert_raises_regex(ValueError, 'tuple-index'): + with pytest.raises(ValueError, match='tuple-index'): test_data.ts[:, 2] = 2 # weird lists. [slice(0, 5)] will work but not two slices diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index 00e145680c7a6..79de3dc3be19f 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -80,7 +80,7 @@ def test_rename_axis_supported(self): s = Series(range(5)) s.rename({}, axis=0) s.rename({}, axis='index') - with tm.assert_raises_regex(ValueError, 'No axis named 5'): + with pytest.raises(ValueError, match='No axis named 5'): s.rename({}, axis=5) def test_set_name_attribute(self): @@ -169,7 +169,7 @@ def test_reset_index_level(self): drop=True) tm.assert_frame_equal(result, df[['C']]) - with tm.assert_raises_regex(KeyError, 'Level E '): + with pytest.raises(KeyError, match='Level E '): s.reset_index(level=['A', 'E']) # With single-level Index @@ -184,7 +184,7 @@ def test_reset_index_level(self): result = s.reset_index(level=levels[0], drop=True) tm.assert_series_equal(result, df['B']) - with tm.assert_raises_regex(IndexError, 'Too many levels'): + with pytest.raises(IndexError, match='Too many levels'): s.reset_index(level=[0, 1, 2]) # Check that .reset_index([],drop=True) doesn't fail @@ -241,7 +241,7 @@ def test_rename_axis_mapper(self): result = s.rename_axis(index=['foo', 'goo']) assert result.index.names == ['foo', 'goo'] - with tm.assert_raises_regex(TypeError, 'unexpected'): + with pytest.raises(TypeError, match='unexpected'): s.rename_axis(columns='wrong') def test_rename_axis_inplace(self, datetime_series): @@ -289,7 +289,7 @@ def test_set_axis_inplace(self): # wrong values for the "axis" parameter for axis in [2, 'foo']: - with tm.assert_raises_regex(ValueError, 'No axis named'): + with pytest.raises(ValueError, match='No axis named'): s.set_axis(list('abcd'), axis=axis, inplace=False) def test_set_axis_prior_to_deprecation_signature(self): @@ -308,14 +308,14 @@ def test_reset_index_drop_errors(self): # KeyError raised for series index when passed level name is missing s = Series(range(4)) - with tm.assert_raises_regex(KeyError, 'must be same as name'): + with pytest.raises(KeyError, match='must be same as name'): s.reset_index('wrong', drop=True) - with tm.assert_raises_regex(KeyError, 'must be same as name'): + with pytest.raises(KeyError, match='must be same as name'): s.reset_index('wrong') # KeyError raised for series when level to be dropped is missing s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) - with tm.assert_raises_regex(KeyError, 'not found'): + with pytest.raises(KeyError, match='not found'): s.reset_index('wrong', drop=True) def test_droplevel(self): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index cbcfa629c8928..a5a7cc2217864 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -26,7 +26,7 @@ assert_series_equal) -class TestSeriesAnalytics(): +class TestSeriesAnalytics(object): @pytest.mark.parametrize("use_bottleneck", [True, False]) @pytest.mark.parametrize("method, unit", [ @@ -561,8 +561,8 @@ def _check_stat_op(self, name, alternate, string_series_, # Unimplemented numeric_only parameter. if 'numeric_only' in compat.signature(f).args: - tm.assert_raises_regex(NotImplementedError, name, f, - string_series_, numeric_only=True) + with pytest.raises(NotImplementedError, match=name): + f(string_series_, numeric_only=True) def _check_accum_op(self, name, datetime_series_, check_dtype=True): func = getattr(np, name) @@ -601,12 +601,12 @@ def test_numpy_compress(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.compress, - cond, s, axis=1) + with pytest.raises(ValueError, match=msg): + np.compress(cond, s, axis=1) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.compress, - cond, s, out=s) + with pytest.raises(ValueError, match=msg): + np.compress(cond, s, out=s) def test_round(self, datetime_series): datetime_series.index.name = "index_name" @@ -624,7 +624,7 @@ def test_numpy_round(self): assert_series_equal(out, expected) msg = "the 'out' parameter is not supported" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): np.round(s, decimals=0, out=s) def test_built_in_round(self): @@ -789,7 +789,7 @@ def test_corr_invalid_method(self): s2 = pd.Series(np.random.randn(10)) msg = ("method must be either 'pearson', 'spearman', " "or 'kendall'") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s1.corr(s2, method="____") def test_corr_callable_method(self, datetime_series): @@ -1254,8 +1254,8 @@ def test_numpy_argmin_deprecated(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argmin, - s, out=data) + with pytest.raises(ValueError, match=msg): + np.argmin(s, out=data) def test_idxmax(self, string_series): # test idxmax @@ -1322,8 +1322,8 @@ def test_numpy_argmax_deprecated(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.argmax, - s, out=data) + with pytest.raises(ValueError, match=msg): + np.argmax(s, out=data) def test_ptp(self): # GH21614 @@ -1392,7 +1392,8 @@ def test_numpy_repeat(self): assert_series_equal(np.repeat(s, 2), expected) msg = "the 'axis' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.repeat, s, 2, axis=0) + with pytest.raises(ValueError, match=msg): + np.repeat(s, 2, axis=0) def test_searchsorted(self): s = Series([1, 2, 3]) @@ -1929,7 +1930,7 @@ def test_error(self, r): args = 2, len(r), 0, -1 methods = r.nlargest, r.nsmallest for method, arg in product(methods, args): - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): method(arg) def test_nsmallest_nlargest(self, s_main_dtypes_split): @@ -1959,9 +1960,9 @@ def test_misc(self): assert_series_equal(s.nsmallest(), s.iloc[[2, 3, 0, 4]]) msg = 'keep must be either "first", "last"' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.nsmallest(keep='invalid') - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.nlargest(keep='invalid') # GH 15297 diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index bd0d02014dcdb..f944d6f8c9d08 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -457,8 +457,7 @@ def test_str_attribute(self): # str accessor only valid with string values s = Series(range(5)) - with tm.assert_raises_regex(AttributeError, - 'only use .str accessor'): + with pytest.raises(AttributeError, match='only use .str accessor'): s.str.repeat(2) def test_empty_method(self): @@ -525,26 +524,25 @@ def test_cat_accessor_api(self): assert isinstance(s.cat, CategoricalAccessor) invalid = Series([1]) - with tm.assert_raises_regex(AttributeError, - "only use .cat accessor"): + with pytest.raises(AttributeError, match="only use .cat accessor"): invalid.cat assert not hasattr(invalid, 'cat') def test_cat_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 c = Series(list('aabbcde')).astype('category') - with tm.assert_raises_regex(AttributeError, - "You cannot add any new attribute"): + with pytest.raises(AttributeError, + match="You cannot add any new attribute"): c.cat.xlabel = "a" def test_categorical_delegations(self): # invalid accessor pytest.raises(AttributeError, lambda: Series([1, 2, 3]).cat) - tm.assert_raises_regex( - AttributeError, - r"Can only use .cat accessor with a 'category' dtype", - lambda: Series([1, 2, 3]).cat) + with pytest.raises(AttributeError, + match=(r"Can only use .cat accessor " + r"with a 'category' dtype")): + Series([1, 2, 3]).cat() pytest.raises(AttributeError, lambda: Series(['a', 'b', 'c']).cat) pytest.raises(AttributeError, lambda: Series(np.arange(5.)).cat) pytest.raises(AttributeError, @@ -674,9 +672,9 @@ def test_str_accessor_api_for_categorical(self): tm.assert_series_equal(res, exp) invalid = Series([1, 2, 3]).astype('category') - with tm.assert_raises_regex(AttributeError, - "Can only use .str " - "accessor with string"): + msg = "Can only use .str accessor with string" + + with pytest.raises(AttributeError, match=msg): invalid.str assert not hasattr(invalid, 'str') @@ -760,7 +758,8 @@ def test_dt_accessor_api_for_categorical(self): tm.assert_almost_equal(res, exp) invalid = Series([1, 2, 3]).astype('category') - with tm.assert_raises_regex( - AttributeError, "Can only use .dt accessor with datetimelike"): + msg = "Can only use .dt accessor with datetimelike" + + with pytest.raises(AttributeError, match=msg): invalid.dt assert not hasattr(invalid, 'str') diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 979775633f644..d1d6aa8b51c0d 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -82,7 +82,7 @@ def test_add_series_with_period_index(self): tm.assert_series_equal(result, expected) msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" - with tm.assert_raises_regex(IncompatibleFrequency, msg): + with pytest.raises(IncompatibleFrequency, match=msg): ts + ts.asfreq('D', how="end") def test_operators_datetimelike(self): @@ -139,7 +139,7 @@ def test_comparison_flex_basic(self): # msg = 'No axis named 1 for object type' for op in ['eq', 'ne', 'le', 'le', 'gt', 'ge']: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): getattr(left, op)(right, axis=1) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index a685eb7e9fbd3..3f137bf686715 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -51,9 +51,9 @@ def test_append_duplicates(self): exp, check_index_type=True) msg = 'Indexes have overlapping values:' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s1.append(s2, verify_integrity=True) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.concat([s1, s2], verify_integrity=True) def test_combine_scalar(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 494321c5190a6..ce0cf0d5c089e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -33,7 +33,7 @@ def test_invalid_dtype(self): msg = 'not understood' invalid_list = [pd.Timestamp, 'pd.Timestamp', list] for dtype in invalid_list: - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): Series([], name='time', dtype=dtype) def test_scalar_conversion(self): @@ -560,19 +560,19 @@ def test_constructor_pass_nan_nat(self): def test_constructor_cast(self): msg = "could not convert string to float" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Series(["a", "b", "c"], dtype=float) def test_constructor_unsigned_dtype_overflow(self, uint_dtype): # see gh-15832 msg = 'Trying to coerce negative values to unsigned integers' - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): Series([-1], dtype=uint_dtype) def test_constructor_coerce_float_fail(self, any_int_dtype): # see gh-15832 msg = "Trying to coerce float values to integers" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Series([1, 2, 3.5], dtype=any_int_dtype) def test_constructor_coerce_float_valid(self, float_dtype): @@ -1162,7 +1162,7 @@ def test_constructor_cant_cast_datetimelike(self, index): # PeriodIndex or PeriodArray type(index).__name__.rstrip("Index") ) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): Series(index, dtype=float) # ints are ok @@ -1200,7 +1200,7 @@ def test_constructor_generic_timestamp_no_frequency(self, dtype): # see gh-15524, gh-15987 msg = "dtype has no unit. Please pass in" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): Series([], dtype=dtype) @pytest.mark.parametrize("dtype,msg", [ @@ -1210,7 +1210,7 @@ def test_constructor_generic_timestamp_no_frequency(self, dtype): def test_constructor_generic_timestamp_bad_frequency(self, dtype, msg): # see gh-15524, gh-15987 - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): Series([], dtype=dtype) @pytest.mark.parametrize('dtype', [None, 'uint8', 'category']) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index f3ae2b1e6ad15..b1c92c2b82a56 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -216,7 +216,7 @@ def get_dir(s): # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') - with tm.assert_raises_regex(ValueError, "modifications"): + with pytest.raises(ValueError, match="modifications"): s.dt.hour = 5 # trying to set a copy @@ -314,8 +314,8 @@ def test_dt_namespace_accessor_categorical(self): def test_dt_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(date_range('20130101', periods=5, freq='D')) - with tm.assert_raises_regex(AttributeError, - "You cannot add any new attribute"): + with pytest.raises(AttributeError, + match="You cannot add any new attribute"): s.dt.xlabel = "a" @pytest.mark.parametrize('time_locale', [ @@ -481,7 +481,7 @@ def test_dt_accessor_api(self): Series(np.random.randn(5))]) def test_dt_accessor_invalid(self, ser): # GH#9322 check that series with incorrect dtypes don't have attr - with tm.assert_raises_regex(AttributeError, "only use .dt accessor"): + with pytest.raises(AttributeError, match="only use .dt accessor"): ser.dt assert not hasattr(ser, 'dt') diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 75017f2d22794..79b1bc10b9f4b 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -68,7 +68,7 @@ def test_astype_cast_nan_inf_int(self, dtype, value): msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' s = Series([value]) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.astype(dtype) @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) @@ -404,7 +404,7 @@ def test_astype_generic_timestamp_no_frequency(self, dtype): s = Series(data) msg = "dtype has no unit. Please pass in" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.astype(dtype) @pytest.mark.parametrize("dtype", np.typecodes['All']) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c38b7c0083a21..dc58b46f90609 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -401,31 +401,31 @@ def test_fillna_categorical_raise(self): data = ['a', np.nan, 'b', np.nan, np.nan] s = Series(Categorical(data, categories=['a', 'b'])) - with tm.assert_raises_regex(ValueError, - "fill value must be in categories"): + with pytest.raises(ValueError, + match="fill value must be in categories"): s.fillna('d') - with tm.assert_raises_regex(ValueError, - "fill value must be in categories"): + with pytest.raises(ValueError, + match="fill value must be in categories"): s.fillna(Series('d')) - with tm.assert_raises_regex(ValueError, - "fill value must be in categories"): + with pytest.raises(ValueError, + match="fill value must be in categories"): s.fillna({1: 'd', 3: 'a'}) - with tm.assert_raises_regex(TypeError, - '"value" parameter must be a scalar or ' - 'dict, but you passed a "list"'): + msg = ('"value" parameter must be a scalar or ' + 'dict, but you passed a "list"') + with pytest.raises(TypeError, match=msg): s.fillna(['a', 'b']) - with tm.assert_raises_regex(TypeError, - '"value" parameter must be a scalar or ' - 'dict, but you passed a "tuple"'): + msg = ('"value" parameter must be a scalar or ' + 'dict, but you passed a "tuple"') + with pytest.raises(TypeError, match=msg): s.fillna(('a', 'b')) - with tm.assert_raises_regex(TypeError, - '"value" parameter must be a scalar, dict ' - 'or Series, but you passed a "DataFrame"'): + msg = ('"value" parameter must be a scalar, dict ' + 'or Series, but you passed a "DataFrame"') + with pytest.raises(TypeError, match=msg): s.fillna(DataFrame({1: ['a'], 3: ['b']})) def test_fillna_nat(self): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 35bd99ff2eda8..77e43a346c824 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -604,23 +604,23 @@ def test_comp_ops_df_compat(self): for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: msg = "Can only compare identically-labeled Series objects" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left == right - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left != right - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left < right msg = "Can only compare identically-labeled DataFrame objects" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left.to_frame() == right.to_frame() - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left.to_frame() != right.to_frame() - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): left.to_frame() < right.to_frame() diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index 027814c618303..4f462e11e9bb9 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -44,7 +44,7 @@ def test_quantile(self): msg = 'percentiles should all be in the interval \\[0, 1\\]' for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.ts.quantile(invalid) def test_quantile_multi(self): diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 40a30cc8cf09a..9772ceecfc7b1 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -185,11 +185,11 @@ def test_rank_categorical(self): # Test invalid values for na_option msg = "na_option must be one of 'keep', 'top', or 'bottom'" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): na_ser.rank(na_option='bad', ascending=False) # invalid type - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): na_ser.rank(na_option=True, ascending=False) # Test with pct=True diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 7efde1fbdd1f5..3a9c210017625 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -77,7 +77,7 @@ def test_replace(self): # make sure that we aren't just masking a TypeError because bools don't # implement indexing - with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): + with pytest.raises(TypeError, match='Cannot compare types .+'): ser.replace([1, 2], [np.nan, 0]) ser = pd.Series([0, 1, 2, 3, 4]) @@ -137,9 +137,9 @@ def test_replace_with_empty_list(self): tm.assert_series_equal(result, expected) # GH 19266 - with tm.assert_raises_regex(ValueError, "cannot assign mismatch"): + with pytest.raises(ValueError, match="cannot assign mismatch"): s.replace({np.nan: []}) - with tm.assert_raises_regex(ValueError, "cannot assign mismatch"): + with pytest.raises(ValueError, match="cannot assign mismatch"): s.replace({np.nan: ['dummy', 'alt']}) def test_replace_mixed_types(self): @@ -205,7 +205,7 @@ def test_replace_bool_with_bool(self): def test_replace_with_dict_with_bool_keys(self): s = pd.Series([True, False, True]) - with tm.assert_raises_regex(TypeError, 'Cannot compare types .+'): + with pytest.raises(TypeError, match='Cannot compare types .+'): s.replace({'asdf': 'asdb', True: 'yes'}) def test_replace2(self): diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 21f80f181c34d..1681255f7e6bd 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -248,14 +248,16 @@ def test_truncate_nonsortedindex(self): s = pd.Series(['a', 'b', 'c', 'd', 'e'], index=[5, 3, 2, 9, 0]) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + + with pytest.raises(ValueError, match=msg): s.truncate(before=3, after=9) rng = pd.date_range('2011-01-01', '2012-01-01', freq='W') ts = pd.Series(np.random.randn(len(rng)), index=rng) - with tm.assert_raises_regex(ValueError, - 'truncate requires a sorted index'): + msg = 'truncate requires a sorted index' + + with pytest.raises(ValueError, match=msg): ts.sort_values(ascending=False).truncate(before='2011-11', after='2011-12') diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index 3c9701758f12c..bdf5944cab408 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -31,8 +31,9 @@ def test_series_tz_localize(self): # Can't localize if already tz-aware rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') ts = Series(1, index=rng) - tm.assert_raises_regex(TypeError, 'Already tz-aware', - ts.tz_localize, 'US/Eastern') + + with pytest.raises(TypeError, match='Already tz-aware'): + ts.tz_localize('US/Eastern') @pytest.mark.filterwarnings('ignore::FutureWarning') def test_tz_localize_errors_deprecation(self): @@ -123,8 +124,9 @@ def test_series_tz_convert(self): # can't convert tz-naive rng = date_range('1/1/2011', periods=200, freq='D') ts = Series(1, index=rng) - tm.assert_raises_regex(TypeError, "Cannot convert tz-naive", - ts.tz_convert, 'US/Eastern') + + with pytest.raises(TypeError, match="Cannot convert tz-naive"): + ts.tz_convert('US/Eastern') def test_series_tz_convert_to_utc(self): base = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index a6cbb058dbc9d..8f7c16f2c3132 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -1,7 +1,5 @@ import pytest -import pandas.util.testing as tm - class TestSeriesValidate(object): """Tests for error handling related to data types of method arguments.""" @@ -17,5 +15,5 @@ def test_validate_bool_args(self, string_series, func, inplace): if func == "_set_name": kwargs["name"] = "hello" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): getattr(string_series, func)(**kwargs) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 10074a2e5ad99..dd73ec69c3b9a 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -152,10 +152,10 @@ def test_constructor_ndarray(self, float_frame): level=1) # wrong length index / columns - with tm.assert_raises_regex(ValueError, "^Index length"): + with pytest.raises(ValueError, match="^Index length"): SparseDataFrame(float_frame.values, index=float_frame.index[:-1]) - with tm.assert_raises_regex(ValueError, "^Column length"): + with pytest.raises(ValueError, match="^Column length"): SparseDataFrame(float_frame.values, columns=float_frame.columns[:-1]) @@ -638,7 +638,7 @@ def test_set_index(self, float_frame): def test_ctor_reindex(self): idx = pd.Index([0, 1, 2, 3]) - with tm.assert_raises_regex(ValueError, ''): + with pytest.raises(ValueError, match=''): pd.SparseDataFrame({"A": [1, 2]}, index=idx) def test_append(self, float_frame): @@ -870,8 +870,7 @@ def test_join(self, float_frame): right = float_frame.loc[:, ['B', 'D']] pytest.raises(Exception, left.join, right) - with tm.assert_raises_regex(ValueError, - 'Other Series must have a name'): + with pytest.raises(ValueError, match='Other Series must have a name'): float_frame.join(Series( np.random.randn(len(float_frame)), index=float_frame.index)) @@ -1130,7 +1129,8 @@ def test_numpy_transpose(self): tm.assert_sp_frame_equal(result, sdf) msg = "the 'axes' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.transpose, sdf, axes=1) + with pytest.raises(ValueError, match=msg): + np.transpose(sdf, axes=1) def test_combine_first(self, float_frame): df = float_frame @@ -1300,12 +1300,12 @@ def test_numpy_cumsum(self, float_frame): tm.assert_sp_frame_equal(result, expected) msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - float_frame, dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.cumsum(float_frame, dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - float_frame, out=result) + with pytest.raises(ValueError, match=msg): + np.cumsum(float_frame, out=result) def test_numpy_func_call(self, float_frame): # no exception should be raised even though diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 6a5821519866e..9c7dbd85edcbb 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -553,12 +553,12 @@ def test_numpy_take(self): np.take(sp.to_dense(), indices, axis=0)) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.take, - sp, indices, out=np.empty(sp.shape)) + with pytest.raises(ValueError, match=msg): + np.take(sp, indices, out=np.empty(sp.shape)) msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.take, - sp, indices, out=None, mode='clip') + with pytest.raises(ValueError, match=msg): + np.take(sp, indices, out=None, mode='clip') def test_setitem(self): self.bseries[5] = 7. @@ -776,9 +776,9 @@ def _check_all(values, first, second): first_series = SparseSeries(values1, sparse_index=IntIndex(length, index1), fill_value=nan) - with tm.assert_raises_regex(TypeError, - 'new index must be a SparseIndex'): - reindexed = first_series.sparse_reindex(0) # noqa + with pytest.raises(TypeError, + match='new index must be a SparseIndex'): + first_series.sparse_reindex(0) def test_repr(self): # TODO: These aren't used @@ -870,7 +870,7 @@ def _check_matches(indices, expected): # must have NaN fill value data = {'a': SparseSeries(np.arange(7), sparse_index=expected2, fill_value=0)} - with tm.assert_raises_regex(TypeError, "NaN fill value"): + with pytest.raises(TypeError, match="NaN fill value"): spf.homogenize(data) def test_fill_value_corner(self): @@ -1444,7 +1444,7 @@ def test_cumsum(self): axis = 1 # Series is 1-D, so only axis = 0 is valid. msg = "No axis named {axis}".format(axis=axis) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): self.bseries.cumsum(axis=axis) def test_numpy_cumsum(self): @@ -1457,12 +1457,12 @@ def test_numpy_cumsum(self): tm.assert_series_equal(result, expected) msg = "the 'dtype' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - self.bseries, dtype=np.int64) + with pytest.raises(ValueError, match=msg): + np.cumsum(self.bseries, dtype=np.int64) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.cumsum, - self.zbseries, out=result) + with pytest.raises(ValueError, match=msg): + np.cumsum(self.zbseries, out=result) def test_numpy_func_call(self): # no exception should be raised even though @@ -1520,7 +1520,7 @@ def test_to_sparse(): def test_constructor_mismatched_raises(): msg = "Length of passed values is 2, index implies 3" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): SparseSeries([1, 2], index=[1, 2, 3]) diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index 1ecb1f4e8de58..fb10473ec78a8 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -451,7 +451,7 @@ def tests_indexing_with_sparse(self, kind, fill): msg = ("iLocation based boolean indexing cannot " "use an indexable as a mask") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): s.iloc[indexer] diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index d491df587fb4a..3642c4ee98a9e 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1328,7 +1328,7 @@ def test_vector_resize(self, writable, htable, uniques, dtype, if safely_resizes: htable.get_labels(vals, uniques, 0, -1) else: - with tm.assert_raises_regex(ValueError, 'external reference.*'): + with pytest.raises(ValueError, match='external reference.*'): htable.get_labels(vals, uniques, 0, -1) uniques.to_array() # should not raise here @@ -1459,7 +1459,7 @@ def test_too_many_ndims(self): arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) msg = "Array with ndim > 2 are not supported" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): algos.rank(arr) @@ -1664,27 +1664,27 @@ def test_int64_add_overflow(): m = np.iinfo(np.int64).max n = np.iinfo(np.int64).min - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), m) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([n, n]), n) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([n, n]), np.array([n, n])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, n]), np.array([n, n])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), b_mask=np.array([False, True])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True]), b_mask=np.array([False, True])) - with tm.assert_raises_regex(OverflowError, msg): + with pytest.raises(OverflowError, match=msg): with tm.assert_produces_warning(RuntimeWarning): algos.checked_add_with_arr(np.array([m, m]), np.array([np.nan, m])) @@ -1692,19 +1692,13 @@ def test_int64_add_overflow(): # Check that the nan boolean arrays override whether or not # the addition overflows. We don't check the result but just # the fact that an OverflowError is not raised. - with pytest.raises(AssertionError): - with tm.assert_raises_regex(OverflowError, msg): - algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), - arr_mask=np.array([True, True])) - with pytest.raises(AssertionError): - with tm.assert_raises_regex(OverflowError, msg): - algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), - b_mask=np.array([True, True])) - with pytest.raises(AssertionError): - with tm.assert_raises_regex(OverflowError, msg): - algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), - arr_mask=np.array([True, False]), - b_mask=np.array([False, True])) + algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), + arr_mask=np.array([True, True])) + algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), + b_mask=np.array([True, True])) + algos.checked_add_with_arr(np.array([m, m]), np.array([m, m]), + arr_mask=np.array([True, False]), + b_mask=np.array([False, True])) class TestMode(object): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 07d357b70f94b..084477d8202b1 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -47,9 +47,9 @@ class CheckImmutable(object): mutable_regex = re.compile('does not support mutable operations') def check_mutable_error(self, *args, **kwargs): - # Pass whatever function you normally would to assert_raises_regex + # Pass whatever function you normally would to pytest.raises # (after the Exception kind). - tm.assert_raises_regex( + pytest.raises( TypeError, self.mutable_regex, *args, **kwargs) def test_no_mutable_funcs(self): @@ -848,9 +848,9 @@ def test_duplicated_drop_duplicates_index(self): result = idx.drop_duplicates(keep=False) tm.assert_index_equal(result, idx[~expected]) - with tm.assert_raises_regex( - TypeError, r"drop_duplicates\(\) got an unexpected " - "keyword argument"): + with pytest.raises(TypeError, + match=(r"drop_duplicates\(\) got an " + r"unexpected keyword argument")): idx.drop_duplicates(inplace=True) else: @@ -1036,10 +1036,10 @@ def test_transpose(self): def test_transpose_non_default_axes(self): for obj in self.objs: - tm.assert_raises_regex(ValueError, self.errmsg, - obj.transpose, 1) - tm.assert_raises_regex(ValueError, self.errmsg, - obj.transpose, axes=1) + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(1) + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(axes=1) def test_numpy_transpose(self): for obj in self.objs: @@ -1048,8 +1048,8 @@ def test_numpy_transpose(self): else: tm.assert_series_equal(np.transpose(obj), obj) - tm.assert_raises_regex(ValueError, self.errmsg, - np.transpose, obj, axes=1) + with pytest.raises(ValueError, match=self.errmsg): + np.transpose(obj, axes=1) class TestNoNewAttributesMixin(object): diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index fbc0faa4c929a..c5ea69b5ec46f 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -4,7 +4,6 @@ import pandas # noqa import pandas as pd from pandas.errors import AbstractMethodError -import pandas.util.testing as tm @pytest.mark.parametrize( @@ -62,13 +61,13 @@ def method(self): def test_AbstractMethodError_classmethod(): xpr = "This classmethod must be defined in the concrete class Foo" - with tm.assert_raises_regex(AbstractMethodError, xpr): + with pytest.raises(AbstractMethodError, match=xpr): Foo.classmethod() xpr = "This property must be defined in the concrete class Foo" - with tm.assert_raises_regex(AbstractMethodError, xpr): + with pytest.raises(AbstractMethodError, match=xpr): Foo().property xpr = "This method must be defined in the concrete class Foo" - with tm.assert_raises_regex(AbstractMethodError, xpr): + with pytest.raises(AbstractMethodError, match=xpr): Foo().method() diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index a7b9bf9c9a351..4a61ce930cbab 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -379,22 +379,22 @@ def test_bool_ops_raise_on_arithmetic(self): f = getattr(operator, name) err_msg = re.escape(msg % op) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(df, df) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(df.a, df.b) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(df.a, True) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(False, df.a) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(False, df) - with tm.assert_raises_regex(NotImplementedError, err_msg): + with pytest.raises(NotImplementedError, match=err_msg): f(df, True) def test_bool_ops_warn_on_arithmetic(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 9829c04ea108f..2717b92e05a29 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -325,7 +325,7 @@ def test_frame_getitem_setitem_boolean(self): np.putmask(values[:-1], values[:-1] < 0, 2) tm.assert_almost_equal(df.values, values) - with tm.assert_raises_regex(TypeError, 'boolean values only'): + with pytest.raises(TypeError, match='boolean values only'): df[df * 0] = 2 def test_frame_getitem_setitem_slice(self): @@ -772,8 +772,8 @@ def _check_counts(frame, axis=0): # can't call with level on regular DataFrame df = tm.makeTimeDataFrame() - tm.assert_raises_regex( - TypeError, 'hierarchical', df.count, level=0) + with pytest.raises(TypeError, match='hierarchical'): + df.count(level=0) self.frame['D'] = 'foo' result = self.frame.count(level=0, numeric_only=True) @@ -809,10 +809,9 @@ def test_count_level_corner(self): tm.assert_frame_equal(result, expected) def test_get_level_number_out_of_bounds(self): - with tm.assert_raises_regex(IndexError, "Too many levels"): + with pytest.raises(IndexError, match="Too many levels"): self.frame.index._get_level_number(2) - with tm.assert_raises_regex(IndexError, - "not a valid level number"): + with pytest.raises(IndexError, match="not a valid level number"): self.frame.index._get_level_number(-3) def test_unstack(self): @@ -1029,17 +1028,16 @@ def test_stack_names_and_numbers(self): unstacked = self.ymd.unstack(['year', 'month']) # Can't use mixture of names and numbers to stack - with tm.assert_raises_regex(ValueError, "level should contain"): + with pytest.raises(ValueError, match="level should contain"): unstacked.stack([0, 'month']) def test_stack_multiple_out_of_bounds(self): # nlevels == 3 unstacked = self.ymd.unstack(['year', 'month']) - with tm.assert_raises_regex(IndexError, "Too many levels"): + with pytest.raises(IndexError, match="Too many levels"): unstacked.stack([2, 3]) - with tm.assert_raises_regex(IndexError, - "not a valid level number"): + with pytest.raises(IndexError, match="not a valid level number"): unstacked.stack([-4, -3]) def test_unstack_period_series(self): @@ -1327,10 +1325,10 @@ def test_reorder_levels(self): expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) tm.assert_frame_equal(result, expected) - with tm.assert_raises_regex(TypeError, 'hierarchical axis'): + with pytest.raises(TypeError, match='hierarchical axis'): self.ymd.reorder_levels([1, 2], axis=1) - with tm.assert_raises_regex(IndexError, 'Too many levels'): + with pytest.raises(IndexError, match='Too many levels'): self.ymd.index.reorder_levels([1, 2, 3]) def test_insert_index(self): @@ -2351,9 +2349,9 @@ def test_reset_index_multiindex_columns(self): tm.assert_frame_equal(result, df) # gh-16120: already existing column - with tm.assert_raises_regex(ValueError, - (r"cannot insert \('A', ''\), " - "already exists")): + with pytest.raises(ValueError, + match=(r"cannot insert \('A', ''\), " + "already exists")): df.rename_axis('A').reset_index() # gh-16164: multiindex (tuple) full key @@ -2368,9 +2366,9 @@ def test_reset_index_multiindex_columns(self): tm.assert_frame_equal(result, expected) # with index name which is a too long tuple... - with tm.assert_raises_regex(ValueError, - ("Item must have length equal to number " - "of levels.")): + with pytest.raises(ValueError, + match=("Item must have length equal " + "to number of levels.")): df.rename_axis([('C', 'c', 'i')]).reset_index() # or too short... @@ -2384,9 +2382,9 @@ def test_reset_index_multiindex_columns(self): tm.assert_frame_equal(result, expected) # ... which is incompatible with col_fill=None - with tm.assert_raises_regex(ValueError, - ("col_fill=None is incompatible with " - r"incomplete column name \('C', 'c'\)")): + with pytest.raises(ValueError, + match=("col_fill=None is incompatible with " + r"incomplete column name \('C', 'c'\)")): df2.rename_axis([('C', 'c')]).reset_index(col_fill=None) # with col_level != 0 diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 775fcc2684f42..bc644071e914f 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -177,8 +177,8 @@ def wrapper(x): # Unimplemented numeric_only parameter. if 'numeric_only' in signature(f).args: - tm.assert_raises_regex(NotImplementedError, name, f, - numeric_only=True) + with pytest.raises(NotImplementedError, match=name): + f(numeric_only=True) @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") @@ -221,10 +221,10 @@ def test_get_axis_number(self): assert self.panel._get_axis_number('major') == 1 assert self.panel._get_axis_number('minor') == 2 - with tm.assert_raises_regex(ValueError, "No axis named foo"): + with pytest.raises(ValueError, match="No axis named foo"): self.panel._get_axis_number('foo') - with tm.assert_raises_regex(ValueError, "No axis named foo"): + with pytest.raises(ValueError, match="No axis named foo"): self.panel.__ge__(self.panel, axis='foo') def test_get_axis_name(self): @@ -502,10 +502,9 @@ def test_setitem(self): # bad shape p = Panel(np.random.randn(4, 3, 2)) - with tm.assert_raises_regex(ValueError, - r"shape of value must be " - r"\(3, 2\), shape of given " - r"object was \(4, 2\)"): + msg = (r"shape of value must be \(3, 2\), " + r"shape of given object was \(4, 2\)") + with pytest.raises(ValueError, match=msg): p[0] = np.random.randn(4, 2) def test_setitem_ndarray(self): @@ -853,9 +852,8 @@ def test_get_value(self): assert_almost_equal(result, expected) with catch_warnings(): simplefilter("ignore", FutureWarning) - with tm.assert_raises_regex(TypeError, - "There must be an argument " - "for each axis"): + msg = "There must be an argument for each axis" + with pytest.raises(TypeError, match=msg): self.panel.get_value('a') def test_set_value(self): @@ -880,7 +878,7 @@ def test_set_value(self): msg = ("There must be an argument for each " "axis plus the value provided") - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): self.panel.set_value('a') @@ -1015,7 +1013,8 @@ def _check_dtype(panel, dtype): _check_dtype(panel, dtype) def test_constructor_fails_with_not_3d_input(self): - with tm.assert_raises_regex(ValueError, "The number of dimensions required is 3"): # noqa + msg = "The number of dimensions required is 3" + with pytest.raises(ValueError, match=msg): Panel(np.random.randn(10, 2)) def test_consolidate(self): @@ -1144,36 +1143,24 @@ def test_from_dict_mixed_orient(self): assert panel['A'].values.dtype == np.float64 def test_constructor_error_msgs(self): - def testit(): + msg = (r"Shape of passed values is \(3, 4, 5\), " + r"indices imply \(4, 5, 5\)") + with pytest.raises(ValueError, match=msg): Panel(np.random.randn(3, 4, 5), lrange(4), lrange(5), lrange(5)) - tm.assert_raises_regex(ValueError, - r"Shape of passed values is " - r"\(3, 4, 5\), indices imply " - r"\(4, 5, 5\)", - testit) - - def testit(): + msg = (r"Shape of passed values is \(3, 4, 5\), " + r"indices imply \(5, 4, 5\)") + with pytest.raises(ValueError, match=msg): Panel(np.random.randn(3, 4, 5), lrange(5), lrange(4), lrange(5)) - tm.assert_raises_regex(ValueError, - r"Shape of passed values is " - r"\(3, 4, 5\), indices imply " - r"\(5, 4, 5\)", - testit) - - def testit(): + msg = (r"Shape of passed values is \(3, 4, 5\), " + r"indices imply \(5, 5, 4\)") + with pytest.raises(ValueError, match=msg): Panel(np.random.randn(3, 4, 5), lrange(5), lrange(5), lrange(4)) - tm.assert_raises_regex(ValueError, - r"Shape of passed values is " - r"\(3, 4, 5\), indices imply " - r"\(5, 5, 4\)", - testit) - def test_conform(self): df = self.panel['ItemA'][:-5].filter(items=['A', 'B']) conformed = self.panel.conform(df) @@ -1634,12 +1621,12 @@ def test_transpose(self): assert_panel_equal(result, expected) # duplicate axes - with tm.assert_raises_regex(TypeError, - 'not enough/duplicate arguments'): + with pytest.raises(TypeError, + match='not enough/duplicate arguments'): self.panel.transpose('minor', maj='major', minor='items') - with tm.assert_raises_regex(ValueError, - 'repeated axis in transpose'): + with pytest.raises(ValueError, + match='repeated axis in transpose'): self.panel.transpose('minor', 'major', major='minor', minor='items') @@ -1833,8 +1820,9 @@ def test_to_panel_duplicates(self): # #2441 df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) idf = df.set_index(['a', 'b']) - tm.assert_raises_regex( - ValueError, 'non-uniquely indexed', idf.to_panel) + + with pytest.raises(ValueError, match='non-uniquely indexed'): + idf.to_panel() def test_panel_dups(self): @@ -1954,8 +1942,8 @@ def test_tshift(self): shifted3 = ps.tshift(freq=BDay()) assert_panel_equal(shifted, shifted3) - tm.assert_raises_regex(ValueError, 'does not match', - ps.tshift, freq='M') + with pytest.raises(ValueError, match='does not match'): + ps.tshift(freq='M') # DatetimeIndex panel = make_test_panel() @@ -2067,7 +2055,8 @@ def test_numpy_round(self): assert_panel_equal(expected, result) msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.round, p, out=p) + with pytest.raises(ValueError, match=msg): + np.round(p, out=p) # removing Panel before NumPy enforces, so just ignore @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") @@ -2493,8 +2482,8 @@ def test_to_string(self): def test_to_sparse(self): if isinstance(self.panel, Panel): msg = 'sparsifying is not supported' - tm.assert_raises_regex(NotImplementedError, msg, - self.panel.to_sparse) + with pytest.raises(NotImplementedError, match=msg): + self.panel.to_sparse def test_truncate(self): dates = self.panel.index.levels[0] diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py index 33b9798b7606a..acc18ed7ad049 100644 --- a/pandas/tests/test_register_accessor.py +++ b/pandas/tests/test_register_accessor.py @@ -85,5 +85,5 @@ class Bad(object): def __init__(self, data): raise AttributeError("whoops") - with tm.assert_raises_regex(AttributeError, "whoops"): + with pytest.raises(AttributeError, match="whoops"): pd.Series([]).bad diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index ed29e20fd5ca5..756385f0cfb56 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -158,7 +158,7 @@ def test_select_bad_cols(self): pytest.raises(KeyError, g.__getitem__, ['D']) pytest.raises(KeyError, g.__getitem__, ['A', 'D']) - with tm.assert_raises_regex(KeyError, '^[^A]+$'): + with pytest.raises(KeyError, match='^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'D']] @@ -940,11 +940,10 @@ def test_numpy_compat(self): for func in ('min', 'max', 'sum', 'prod', 'mean', 'var', 'std'): - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, func), - func, 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, func), axis=1) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(func, 1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(axis=1) def test_resample_how_callables(self): # GH 7929 @@ -3308,11 +3307,10 @@ def test_fails_on_no_datetime_index(self): for name, func in zip(index_names, index_funcs): index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) - with tm.assert_raises_regex(TypeError, - "Only valid with " - "DatetimeIndex, TimedeltaIndex " - "or PeriodIndex, but got an " - "instance of %r" % name): + + msg = ("Only valid with DatetimeIndex, TimedeltaIndex " + "or PeriodIndex, but got an instance of %r" % name) + with pytest.raises(TypeError, match=msg): df.groupby(TimeGrouper('D')) def test_aaa_group_order(self): diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index aa5d0016eca95..22e758a0e59a7 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -423,14 +423,14 @@ def test_unsortable(self): pytest.raises(TypeError, safe_sort, arr) def test_exceptions(self): - with tm.assert_raises_regex(TypeError, - "Only list-like objects are allowed"): + with pytest.raises(TypeError, + match="Only list-like objects are allowed"): safe_sort(values=1) - with tm.assert_raises_regex(TypeError, - "Only list-like objects or None"): + with pytest.raises(TypeError, + match="Only list-like objects or None"): safe_sort(values=[0, 1, 2], labels=1) - with tm.assert_raises_regex(ValueError, - "values should be unique"): + with pytest.raises(ValueError, + match="values should be unique"): safe_sort(values=[0, 1, 2, 1], labels=[0, 1]) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f0873eb7683e9..7cd9182b4dff4 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -36,8 +36,7 @@ def test_api(self): # GH 9184 invalid = Series([1]) - with tm.assert_raises_regex(AttributeError, - "only use .str accessor"): + with pytest.raises(AttributeError, match="only use .str accessor"): invalid.str assert not hasattr(invalid, 'str') @@ -143,13 +142,13 @@ def test_str_cat(self, box): rgx = 'All arrays must be same length, except those having an index.*' z = Series(['1', '2', '3']) - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(z) - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(z.values) - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(list(z)) @pytest.mark.parametrize('box', [Series, Index]) @@ -157,9 +156,9 @@ def test_str_cat_raises_intuitive_error(self, box): # GH 11334 s = box(['a', 'b', 'c', 'd']) message = "Did you mean to supply a `sep` keyword?" - with tm.assert_raises_regex(ValueError, message): + with pytest.raises(ValueError, match=message): s.str.cat('|') - with tm.assert_raises_regex(ValueError, message): + with pytest.raises(ValueError, match=message): s.str.cat(' ') @pytest.mark.parametrize('sep', ['', None]) @@ -262,23 +261,23 @@ def test_str_cat_mixed_inputs(self, box): e = concat([z, z], axis=1) # DataFrame - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(e) # two-dimensional ndarray - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(e.values) # list of Series - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat([z, s]) # list of list-likes - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat([z.values, s.values]) # mixed list of Series/list-like - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat([z.values, s]) # errors for incorrect arguments in list-like @@ -287,33 +286,33 @@ def test_str_cat_mixed_inputs(self, box): u = Series(['a', np.nan, 'c', None]) # mix of string and Series - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, 'u']) # DataFrame in list - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, d]) # 2-dim ndarray in list - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, d.values]) # nested lists - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, [u, d]]) # forbidden input type: set # GH 23009 - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat(set(u)) # forbidden input type: set in list # GH 23009 - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat([u, set(u)]) # other forbidden input type, e.g. int - with tm.assert_raises_regex(TypeError, rgx): + with pytest.raises(TypeError, match=rgx): s.str.cat(1) @pytest.mark.parametrize('join', ['left', 'outer', 'inner', 'right']) @@ -371,17 +370,17 @@ def test_str_cat_align_mixed_inputs(self, join): z = Series(['1', '2', '3']).values # unindexed object of wrong length - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat(z, join=join) # unindexed object of wrong length in list - with tm.assert_raises_regex(ValueError, rgx): + with pytest.raises(ValueError, match=rgx): s.str.cat([t, z], join=join) def test_str_cat_raises(self): # non-strings hiding behind object dtype s = Series([1, 2, 3, 4], dtype='object') - with tm.assert_raises_regex(TypeError, "unsupported operand type.*"): + with pytest.raises(TypeError, match="unsupported operand type.*"): s.str.cat(s) def test_str_cat_special_cases(self): @@ -739,15 +738,15 @@ def test_replace_callable(self): r'(?(3)required )positional arguments?') repl = lambda: None - with tm.assert_raises_regex(TypeError, p_err): + with pytest.raises(TypeError, match=p_err): values.str.replace('a', repl) repl = lambda m, x: None - with tm.assert_raises_regex(TypeError, p_err): + with pytest.raises(TypeError, match=p_err): values.str.replace('a', repl) repl = lambda m, x, y=None: None - with tm.assert_raises_regex(TypeError, p_err): + with pytest.raises(TypeError, match=p_err): values.str.replace('a', repl) # test regex named groups @@ -800,16 +799,16 @@ def test_replace_compiled_regex(self): values = Series(['fooBAD__barBAD__bad', NA]) pat = re.compile(r'BAD[_]*') - with tm.assert_raises_regex(ValueError, - "case and flags cannot be"): + with pytest.raises(ValueError, + match="case and flags cannot be"): result = values.str.replace(pat, '', flags=re.IGNORECASE) - with tm.assert_raises_regex(ValueError, - "case and flags cannot be"): + with pytest.raises(ValueError, + match="case and flags cannot be"): result = values.str.replace(pat, '', case=False) - with tm.assert_raises_regex(ValueError, - "case and flags cannot be"): + with pytest.raises(ValueError, + match="case and flags cannot be"): result = values.str.replace(pat, '', case=True) # test with callable @@ -908,8 +907,8 @@ def test_match(self): def test_extract_expand_None(self): values = Series(['fooBAD__barBAD', NA, 'foo']) - with tm.assert_raises_regex(ValueError, - 'expand must be True or False'): + with pytest.raises(ValueError, + match='expand must be True or False'): values.str.extract('.*(BAD[_]+).*(BAD)', expand=None) def test_extract_expand_unspecified(self): @@ -948,7 +947,7 @@ def test_extract_expand_False(self): # Index only works with one regex group since # multi-group would expand to a frame idx = Index(['A1', 'A2', 'A3', 'A4', 'B5']) - with tm.assert_raises_regex(ValueError, "supported"): + with pytest.raises(ValueError, match="supported"): idx.str.extract('([AB])([123])', expand=False) # these should work for both Series and Index @@ -1446,7 +1445,7 @@ def test_extractall_errors(self): # no capture groups. (it returns DataFrame with one column for # each capture group) s = Series(['a3', 'b3', 'd4c2'], name='series_name') - with tm.assert_raises_regex(ValueError, "no capture groups"): + with pytest.raises(ValueError, match="no capture groups"): s.str.extractall(r'[a-z]') def test_extract_index_one_two_groups(self): @@ -1817,12 +1816,12 @@ def test_find(self): dtype=np.int64) tm.assert_numpy_array_equal(result.values, expected) - with tm.assert_raises_regex(TypeError, - "expected a string object, not int"): + with pytest.raises(TypeError, + match="expected a string object, not int"): result = values.str.find(0) - with tm.assert_raises_regex(TypeError, - "expected a string object, not int"): + with pytest.raises(TypeError, + match="expected a string object, not int"): result = values.str.rfind(0) def test_find_nan(self): @@ -1892,13 +1891,11 @@ def _check(result, expected): dtype=np.int64) tm.assert_numpy_array_equal(result.values, expected) - with tm.assert_raises_regex(ValueError, - "substring not found"): + with pytest.raises(ValueError, match="substring not found"): result = s.str.index('DE') - with tm.assert_raises_regex(TypeError, - "expected a string " - "object, not int"): + msg = "expected a string object, not int" + with pytest.raises(TypeError, match=msg): result = s.str.index(0) # test with nan @@ -1982,25 +1979,22 @@ def test_pad_fillchar(self): exp = Series(['XXaXX', 'XXbXX', NA, 'XXcXX', NA, 'eeeeee']) tm.assert_almost_equal(result, exp) - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not str"): + msg = "fillchar must be a character, not str" + with pytest.raises(TypeError, match=msg): result = values.str.pad(5, fillchar='XY') - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not int"): + msg = "fillchar must be a character, not int" + with pytest.raises(TypeError, match=msg): result = values.str.pad(5, fillchar=5) - def test_pad_width(self): - # GH 13598 + @pytest.mark.parametrize("f", ['center', 'ljust', 'rjust', 'zfill', 'pad']) + def test_pad_width(self, f): + # see gh-13598 s = Series(['1', '22', 'a', 'bb']) + msg = "width must be of integer type, not*" - for f in ['center', 'ljust', 'rjust', 'zfill', 'pad']: - with tm.assert_raises_regex(TypeError, - "width must be of " - "integer type, not*"): - getattr(s.str, f)('f') + with pytest.raises(TypeError, match=msg): + getattr(s.str, f)('f') def test_translate(self): @@ -2031,8 +2025,8 @@ def _check(result, expected): expected = klass(['abcde', 'abcc', 'cddd', 'cde']) _check(result, expected) else: - with tm.assert_raises_regex( - ValueError, "deletechars is not a valid argument"): + msg = "deletechars is not a valid argument" + with pytest.raises(ValueError, match=msg): result = s.str.translate(table, deletechars='fg') # Series with non-string values @@ -2120,35 +2114,25 @@ def test_center_ljust_rjust_fillchar(self): # If fillchar is not a charatter, normal str raises TypeError # 'aaa'.ljust(5, 'XY') # TypeError: must be char, not str - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not str"): - result = values.str.center(5, fillchar='XY') - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not str"): - result = values.str.ljust(5, fillchar='XY') - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not str"): - result = values.str.rjust(5, fillchar='XY') - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not int"): - result = values.str.center(5, fillchar=1) - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not int"): - result = values.str.ljust(5, fillchar=1) - - with tm.assert_raises_regex(TypeError, - "fillchar must be a " - "character, not int"): - result = values.str.rjust(5, fillchar=1) + template = "fillchar must be a character, not {dtype}" + + with pytest.raises(TypeError, match=template.format(dtype="str")): + values.str.center(5, fillchar='XY') + + with pytest.raises(TypeError, match=template.format(dtype="str")): + values.str.ljust(5, fillchar='XY') + + with pytest.raises(TypeError, match=template.format(dtype="str")): + values.str.rjust(5, fillchar='XY') + + with pytest.raises(TypeError, match=template.format(dtype="int")): + values.str.center(5, fillchar=1) + + with pytest.raises(TypeError, match=template.format(dtype="int")): + values.str.ljust(5, fillchar=1) + + with pytest.raises(TypeError, match=template.format(dtype="int")): + values.str.rjust(5, fillchar=1) def test_zfill(self): values = Series(['1', '22', 'aaa', '333', '45678']) @@ -2342,7 +2326,7 @@ def test_split_to_dataframe(self): index=['preserve', 'me']) tm.assert_frame_equal(result, exp) - with tm.assert_raises_regex(ValueError, "expand must be"): + with pytest.raises(ValueError, match="expand must be"): s.str.split('_', expand="not_a_boolean") def test_split_to_multiindex_expand(self): @@ -2367,7 +2351,7 @@ def test_split_to_multiindex_expand(self): tm.assert_index_equal(result, exp) assert result.nlevels == 6 - with tm.assert_raises_regex(ValueError, "expand must be"): + with pytest.raises(ValueError, match="expand must be"): idx.str.split('_', expand="not_a_boolean") def test_rsplit_to_dataframe_expand(self): @@ -3038,8 +3022,7 @@ def test_normalize(self): result = s.str.normalize('NFC') tm.assert_series_equal(result, expected) - with tm.assert_raises_regex(ValueError, - "invalid normalization form"): + with pytest.raises(ValueError, match="invalid normalization form"): s.str.normalize('xxx') s = Index([u'ABC', u'123', u'アイエ']) @@ -3082,9 +3065,9 @@ def test_index_str_accessor_visibility(self): for values, tp in cases: idx = Index(values) message = 'Can only use .str accessor with string values' - with tm.assert_raises_regex(AttributeError, message): + with pytest.raises(AttributeError, match=message): Series(values).str - with tm.assert_raises_regex(AttributeError, message): + with pytest.raises(AttributeError, match=message): idx.str assert idx.inferred_type == tp @@ -3092,14 +3075,14 @@ def test_index_str_accessor_visibility(self): idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')]) assert idx.inferred_type == 'mixed' message = 'Can only use .str accessor with Index, not MultiIndex' - with tm.assert_raises_regex(AttributeError, message): + with pytest.raises(AttributeError, match=message): idx.str def test_str_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(list('aabbcde')) - with tm.assert_raises_regex(AttributeError, - "You cannot add any new attribute"): + with pytest.raises(AttributeError, + match="You cannot add any new attribute"): s.str.xlabel = "a" def test_method_on_bytes(self): diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index ade847923c083..69150ee3c5454 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -90,7 +90,7 @@ def test_1d_with_out(self, dtype_can_hold_na, writeable): expected[3] = np.nan tm.assert_almost_equal(out, expected) else: - with tm.assert_raises_regex(TypeError, self.fill_error): + with pytest.raises(TypeError, match=self.fill_error): algos.take_1d(data, indexer, out=out) # No Exception otherwise. @@ -146,8 +146,7 @@ def test_2d_with_out(self, dtype_can_hold_na, writeable): tm.assert_almost_equal(out1, expected1) else: for i, out in enumerate([out0, out1]): - with tm.assert_raises_regex(TypeError, - self.fill_error): + with pytest.raises(TypeError, match=self.fill_error): algos.take_nd(data, indexer, out=out, axis=i) # No Exception otherwise. @@ -226,8 +225,7 @@ def test_3d_with_out(self, dtype_can_hold_na): tm.assert_almost_equal(out2, expected2) else: for i, out in enumerate([out0, out1, out2]): - with tm.assert_raises_regex(TypeError, - self.fill_error): + with pytest.raises(TypeError, match=self.fill_error): algos.take_nd(data, indexer, out=out, axis=i) # No Exception otherwise. diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index c7cd04deac6c8..31ea5c11f5bd1 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -90,7 +90,7 @@ def test_select_bad_cols(self): pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']] pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']] - with tm.assert_raises_regex(KeyError, '^[^A]+$'): + with pytest.raises(KeyError, match='^[^A]+$'): # A should not be referenced as a bad column... # will have to rethink regex if you change message! g[['A', 'C']] @@ -116,7 +116,7 @@ def test_skip_sum_object_raises(self): df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'}) r = df.rolling(window=3) - with tm.assert_raises_regex(TypeError, 'cannot handle this type'): + with pytest.raises(TypeError, match='cannot handle this type'): r.sum() def test_agg(self): @@ -410,10 +410,10 @@ def test_numpy_compat(self, method): msg = "numpy operations are not valid with window objects" - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(w, method), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(w, method), dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(dtype=np.float64) class TestRolling(Base): @@ -507,10 +507,10 @@ def test_numpy_compat(self, method): msg = "numpy operations are not valid with window objects" - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, method), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(r, method), dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(dtype=np.float64) def test_closed(self): df = DataFrame({'A': [0, 1, 2, 3, 4]}) @@ -686,10 +686,10 @@ def test_numpy_compat(self, method): msg = "numpy operations are not valid with window objects" - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, method), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, method), dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) @pytest.mark.parametrize( 'expander', @@ -812,10 +812,10 @@ def test_numpy_compat(self, method): msg = "numpy operations are not valid with window objects" - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, method), 1, 2, 3) - tm.assert_raises_regex(UnsupportedFunctionCall, msg, - getattr(e, method), dtype=np.float64) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) # gh-12373 : rolling functions error on float32 data @@ -1999,12 +1999,12 @@ def test_no_pairwise_with_other(self, f): tm.assert_index_equal(result.index, expected_index) tm.assert_index_equal(result.columns, expected_columns) else: - tm.assert_raises_regex( - ValueError, "'arg1' columns are not unique", f, df, - self.df2) - tm.assert_raises_regex( - ValueError, "'arg2' columns are not unique", f, - self.df2, df) + with pytest.raises(ValueError, + match="'arg1' columns are not unique"): + f(df, self.df2) + with pytest.raises(ValueError, + match="'arg2' columns are not unique"): + f(self.df2, df) @pytest.mark.parametrize( 'f', [lambda x, y: x.expanding().cov(y), diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 43c7d0951bf6c..a0a1364f4617e 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -54,7 +54,7 @@ def test_series_numeric(self): def test_error(self): s = pd.Series([1, -3.14, 'apple']) msg = 'Unable to parse string "apple" at position 2' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') @@ -67,13 +67,13 @@ def test_error(self): s = pd.Series(['orange', 1, -3.14, 'apple']) msg = 'Unable to parse string "orange" at position 0' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_numeric(s, errors='raise') def test_error_seen_bool(self): s = pd.Series([True, False, 'apple']) msg = 'Unable to parse string "apple" at position 2' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_numeric(s, errors='raise') res = to_numeric(s, errors='ignore') @@ -166,7 +166,7 @@ def test_type_check(self, errors): # see gh-11776 df = pd.DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]}) kwargs = dict(errors=errors) if errors is not None else dict() - error_ctx = tm.assert_raises_regex(TypeError, "1-d array") + error_ctx = pytest.raises(TypeError, match="1-d array") with error_ctx: to_numeric(df, **kwargs) @@ -269,7 +269,7 @@ def test_non_hashable(self): res = pd.to_numeric(s, errors='ignore') tm.assert_series_equal(res, pd.Series([[10.0, 2], 1.0, 'apple'])) - with tm.assert_raises_regex(TypeError, "Invalid object type"): + with pytest.raises(TypeError, match="Invalid object type"): pd.to_numeric(s) @pytest.mark.parametrize("data", [ @@ -283,7 +283,7 @@ def test_downcast_basic(self, data): invalid_downcast = "unsigned-integer" msg = "invalid downcasting method provided" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): pd.to_numeric(data, downcast=invalid_downcast) expected = np.array([1, 2, 3], dtype=np.int64) @@ -436,5 +436,5 @@ def test_coerce_uint64_conflict(self): tm.assert_series_equal(result, s) msg = "Unable to parse string" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): to_numeric(s, errors="raise") diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py index 223298dc42544..2f17a61917320 100644 --- a/pandas/tests/tseries/offsets/test_fiscal.py +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -7,8 +7,6 @@ from dateutil.relativedelta import relativedelta import pytest -import pandas.util.testing as tm - from pandas import Timestamp from pandas.tseries.frequencies import get_offset from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG @@ -44,9 +42,9 @@ def test_get_offset_name(): def test_get_offset(): - with tm.assert_raises_regex(ValueError, INVALID_FREQ_ERR_MSG): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): get_offset('gibberish') - with tm.assert_raises_regex(ValueError, INVALID_FREQ_ERR_MSG): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): get_offset('QS-JAN-B') pairs = [ diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index f90c8e449f92c..a8def56aa06d4 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -105,8 +105,7 @@ def test_to_offset_multiple(self): assert (result == expected) # malformed - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 2h20m'): + with pytest.raises(ValueError, match='Invalid frequency: 2h20m'): frequencies.to_offset('2h20m') def test_to_offset_negative(self): @@ -128,23 +127,17 @@ def test_to_offset_negative(self): def test_to_offset_invalid(self): # GH 13930 - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: U1'): + with pytest.raises(ValueError, match='Invalid frequency: U1'): frequencies.to_offset('U1') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: -U'): + with pytest.raises(ValueError, match='Invalid frequency: -U'): frequencies.to_offset('-U') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 3U1'): + with pytest.raises(ValueError, match='Invalid frequency: 3U1'): frequencies.to_offset('3U1') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: -2-3U'): + with pytest.raises(ValueError, match='Invalid frequency: -2-3U'): frequencies.to_offset('-2-3U') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: -2D:3H'): + with pytest.raises(ValueError, match='Invalid frequency: -2D:3H'): frequencies.to_offset('-2D:3H') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 1.5.0S'): + with pytest.raises(ValueError, match='Invalid frequency: 1.5.0S'): frequencies.to_offset('1.5.0S') # split offsets with spaces are valid @@ -157,11 +150,9 @@ def test_to_offset_invalid(self): # special cases assert frequencies.to_offset('2SMS-15') == offsets.SemiMonthBegin(2) - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 2SMS-15-15'): + with pytest.raises(ValueError, match='Invalid frequency: 2SMS-15-15'): frequencies.to_offset('2SMS-15-15') - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: 2SMS-15D'): + with pytest.raises(ValueError, match='Invalid frequency: 2SMS-15D'): frequencies.to_offset('2SMS-15D') def test_to_offset_leading_zero(self): @@ -183,7 +174,7 @@ def test_to_offset_leading_plus(self): assert (result.n == 150) for bad_freq in ['+-1d', '-+1h', '+1', '-7', '+d', '-m']: - with tm.assert_raises_regex(ValueError, 'Invalid frequency:'): + with pytest.raises(ValueError, match='Invalid frequency:'): frequencies.to_offset(bad_freq) def test_to_offset_pd_timedelta(self): @@ -270,8 +261,7 @@ def test_anchored_shortcuts(self): 'SMS-BAR', 'SMS-BYR' 'BSMS', 'SMS--2'] for invalid_anchor in invalid_anchors: - with tm.assert_raises_regex(ValueError, - 'Invalid frequency: '): + with pytest.raises(ValueError, match='Invalid frequency: '): frequencies.to_offset(invalid_anchor) @@ -464,13 +454,13 @@ def test_frequency_misc(self): expected = offsets.Minute(5) assert result == expected - with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + with pytest.raises(ValueError, match='Invalid frequency'): frequencies.get_freq_code((5, 'baz')) - with tm.assert_raises_regex(ValueError, 'Invalid frequency'): + with pytest.raises(ValueError, match='Invalid frequency'): frequencies.to_offset('100foo') - with tm.assert_raises_regex(ValueError, 'Could not evaluate'): + with pytest.raises(ValueError, match='Could not evaluate'): frequencies.to_offset(('', '')) @@ -799,8 +789,8 @@ def test_legacy_offset_warnings(self): msg = INVALID_FREQ_ERR_MSG for freq in freqs: - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): frequencies.get_offset(freq) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): date_range('2011-01-01', periods=5, freq=freq) diff --git a/pandas/tests/tslibs/test_libfrequencies.py b/pandas/tests/tslibs/test_libfrequencies.py index f4083dfb2bd1c..18840fe1fd9b9 100644 --- a/pandas/tests/tslibs/test_libfrequencies.py +++ b/pandas/tests/tslibs/test_libfrequencies.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -import pandas.util.testing as tm +import pytest from pandas.tseries import offsets from pandas._libs.tslibs.frequencies import (get_rule_month, @@ -14,7 +14,7 @@ def assert_aliases_deprecated(freq, expected, aliases): assert (_period_str_to_code(freq) == expected) for alias in aliases: - with tm.assert_raises_regex(ValueError, INVALID_FREQ_ERR_MSG): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): _period_str_to_code(alias) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 466a22e5916e9..2762fb9cbe000 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -60,13 +60,13 @@ def test_does_not_convert_mixed_integer(self): def test_parsers_quarterly_with_freq(self): msg = ('Incorrect quarterly string is given, quarter ' 'must be between 1 and 4: 2013Q5') - with tm.assert_raises_regex(parsing.DateParseError, msg): + with pytest.raises(parsing.DateParseError, match=msg): parsing.parse_time_string('2013Q5') # GH 5418 msg = ('Unable to retrieve month information from given freq: ' 'INVLD-L-DEC-SAT') - with tm.assert_raises_regex(parsing.DateParseError, msg): + with pytest.raises(parsing.DateParseError, match=msg): parsing.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT') cases = {('2013Q2', None): datetime(2013, 4, 1), diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 11dd2e98adda2..9f5b4f7b90d9f 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -47,7 +47,7 @@ def test_hash_array_mixed(self): @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')]) def test_hash_array_errors(self, val): msg = 'must pass a ndarray-like' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): hash_array(val) def check_equal(self, obj, **kwargs): @@ -104,7 +104,7 @@ def test_hash_scalar(self, val): @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')]) def test_hash_tuples_err(self, val): msg = 'must be convertible to a list-of-tuples' - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): hash_tuples(val) def test_multiindex_unique(self): @@ -238,7 +238,7 @@ def test_hash_keys(self): def test_invalid_key(self): # this only matters for object dtypes msg = 'key should be a 16-byte string encoded' - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): hash_pandas_object(Series(list('abc')), hash_key='foo') def test_alread_encoded(self): diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index c10ad72d39f8e..a886579ee913f 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -6,6 +6,7 @@ import numpy as np import pytest +from pandas.compat import raise_with_traceback import pandas.util._test_decorators as td import pandas as pd @@ -13,7 +14,7 @@ import pandas.util.testing as tm from pandas.util.testing import ( RNGContext, assert_almost_equal, assert_frame_equal, assert_index_equal, - assert_numpy_array_equal, assert_series_equal, raise_with_traceback) + assert_numpy_array_equal, assert_series_equal) class TestAssertAlmostEqual(object): @@ -152,13 +153,13 @@ def test_assert_almost_equal_object(self): class TestUtilTesting(object): def test_raise_with_traceback(self): - with tm.assert_raises_regex(LookupError, "error_text"): + with pytest.raises(LookupError, match="error_text"): try: raise ValueError("THIS IS AN ERROR") except ValueError as e: e = LookupError("error_text") raise_with_traceback(e) - with tm.assert_raises_regex(LookupError, "error_text"): + with pytest.raises(LookupError, match="error_text"): try: raise ValueError("This is another error") except ValueError: @@ -189,18 +190,18 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5])) # scalar comparison expected = """Expected type """ - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(1, 2) expected = """expected 2\\.00000 but got 1\\.00000, with decimal 5""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(1, 2) # array / scalar array comparison @@ -210,10 +211,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: ndarray \\[right\\]: int""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): # numpy_array_equal only accepts np.ndarray assert_numpy_array_equal(np.array([1]), 1) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([1]), 1) # scalar / array comparison @@ -223,9 +224,9 @@ def test_numpy_array_equal_message(self): \\[left\\]: int \\[right\\]: ndarray""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(1, np.array([1])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(1, np.array([1])) expected = """numpy array are different @@ -234,10 +235,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[nan, 2\\.0, 3\\.0\\] \\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) @@ -247,9 +248,9 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([1, 2]), np.array([1, 3])) expected = """numpy array are different @@ -258,7 +259,7 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[1\\.1, 2\\.000001\\] \\[right\\]: \\[1\\.1, 2.0\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal( np.array([1.1, 2.000001]), np.array([1.1, 2.0])) @@ -271,10 +272,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) @@ -284,10 +285,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) @@ -298,10 +299,10 @@ def test_numpy_array_equal_message(self): \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') @@ -316,10 +317,10 @@ def test_numpy_array_equal_unicode_message(self): \\[left\\]: \\[á, à, ä\\] \\[right\\]: \\[á, à, å\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(np.array([u'á', u'à', u'ä']), np.array([u'á', u'à', u'å'])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(np.array([u'á', u'à', u'ä']), np.array([u'á', u'à', u'å'])) @@ -335,9 +336,9 @@ def test_numpy_array_equal_object_message(self): \\[left\\]: \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\] \\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(a, b) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal(a, b) def test_numpy_array_equal_copy_flag(self): @@ -345,10 +346,10 @@ def test_numpy_array_equal_copy_flag(self): b = a.copy() c = a.view() expected = r'array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)' - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(a, b, check_same='same') expected = r'array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)' - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_numpy_array_equal(a, c, check_same='copy') def test_assert_almost_equal_iterable_message(self): @@ -359,7 +360,7 @@ def test_assert_almost_equal_iterable_message(self): \\[left\\]: 2 \\[right\\]: 3""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal([1, 2], [3, 4, 5]) expected = """Iterable are different @@ -368,7 +369,7 @@ def test_assert_almost_equal_iterable_message(self): \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_almost_equal([1, 2], [1, 3]) @@ -386,7 +387,7 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, exact=False) expected = """MultiIndex level \\[1\\] are different @@ -399,9 +400,9 @@ def test_index_equal_message(self): ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different @@ -412,9 +413,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3, 4]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_exact=False) expected = """Index are different @@ -425,9 +426,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3.0]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, exact=True) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, exact=True, check_exact=False) expected = """Index are different @@ -438,7 +439,7 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0000000001]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) # must success @@ -452,9 +453,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0001]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_exact=False) # must success assert_index_equal(idx1, idx2, check_exact=False, @@ -468,9 +469,9 @@ def test_index_equal_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 4]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_less_precise=True) expected = """MultiIndex level \\[1\\] are different @@ -483,9 +484,9 @@ def test_index_equal_message(self): ('B', 3), ('B', 4)]) idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2, check_exact=False) def test_index_equal_metadata_message(self): @@ -498,7 +499,7 @@ def test_index_equal_metadata_message(self): idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3], name='x') - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) # same name, should pass @@ -515,7 +516,7 @@ def test_index_equal_metadata_message(self): idx1 = pd.Index([1, 2, 3], name=np.nan) idx2 = pd.Index([1, 2, 3], name=pd.NaT) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(idx1, idx2) def test_categorical_index_equality(self): @@ -526,7 +527,7 @@ def test_categorical_index_equality(self): \\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \ ordered=False\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_index_equal(pd.Index(pd.Categorical(['a', 'b'])), pd.Index(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']))) @@ -619,7 +620,7 @@ def test_series_equal_message(self): \\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) \\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4])) expected = """Series are different @@ -628,9 +629,9 @@ def test_series_equal_message(self): \\[left\\]: \\[1, 2, 3\\] \\[right\\]: \\[1, 2, 4\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4])) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]), check_less_precise=True) @@ -642,7 +643,7 @@ def test_categorical_series_equality(self): \\[right\\]: CategoricalDtype\\(categories=\\[u?'a', u?'b', u?'c'\\], \ ordered=False\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_series_equal(pd.Series(pd.Categorical(['a', 'b'])), pd.Series(pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']))) @@ -709,7 +710,7 @@ def test_frame_equal_message(self): \\[left\\]: \\(3, 2\\) \\[right\\]: \\(3, 1\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3]})) @@ -719,7 +720,7 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, @@ -731,7 +732,7 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), pd.DataFrame({'A': [1, 2, 3], 'b': [4, 5, 6]}, @@ -743,11 +744,11 @@ def test_frame_equal_message(self): \\[left\\]: \\[4, 5, 6\\] \\[right\\]: \\[4, 5, 7\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]})) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}), by_blocks=True) @@ -763,13 +764,13 @@ def test_frame_equal_message_unicode(self): \\[left\\]: \\[é, è, ë\\] \\[right\\]: \\[é, è, e̊\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'ë']}), pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'e̊']})) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'ë']}), pd.DataFrame({'A': [u'á', u'à', u'ä'], @@ -782,13 +783,13 @@ def test_frame_equal_message_unicode(self): \\[left\\]: \\[á, à, ä\\] \\[right\\]: \\[a, a, a\\]""" - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'ë']}), pd.DataFrame({'A': ['a', 'a', 'a'], 'E': ['e', 'e', 'e']})) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): assert_frame_equal(pd.DataFrame({'A': [u'á', u'à', u'ä'], 'E': [u'é', u'è', u'ë']}), pd.DataFrame({'A': ['a', 'a', 'a'], @@ -808,7 +809,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 5]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): tm.assert_categorical_equal(a, b) expected = """Categorical\\.codes are different @@ -819,7 +820,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 4, 3], categories=[1, 2, 3, 4]) b = pd.Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): tm.assert_categorical_equal(a, b) expected = """Categorical are different @@ -830,7 +831,7 @@ def test_categorical_equal_message(self): a = pd.Categorical([1, 2, 3, 4], ordered=False) b = pd.Categorical([1, 2, 3, 4], ordered=True) - with tm.assert_raises_regex(AssertionError, expected): + with pytest.raises(AssertionError, match=expected): tm.assert_categorical_equal(a, b) @@ -845,7 +846,7 @@ def test_interval_array_equal_message(self): IntervalArray.left values are different \\(100.0 %\\) \\[left\\]: Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""") - with tm.assert_raises_regex(AssertionError, msg): + with pytest.raises(AssertionError, match=msg): tm.assert_interval_array_equal(a, b) @@ -883,3 +884,13 @@ def test_create_temp_directory(): assert os.path.exists(path) assert os.path.isdir(path) assert not os.path.exists(path) + + +def test_assert_raises_regex_deprecated(): + # see gh-23592 + + with tm.assert_produces_warning(FutureWarning): + msg = "Not equal!" + + with tm.assert_raises_regex(AssertionError, msg): + assert 1 == 2, msg diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index 032ee5eb22aaa..a6cb54ee43909 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -108,7 +108,7 @@ class TestValidateArgs(object): def test_bad_min_fname_arg_count(self): msg = "'max_fname_arg_count' must be non-negative" - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): validate_args(self.fname, (None,), -1, 'foo') def test_bad_arg_length_max_value_single(self): @@ -123,7 +123,7 @@ def test_bad_arg_length_max_value_single(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args(self.fname, args, min_fname_arg_count, compat_args) @@ -140,7 +140,7 @@ def test_bad_arg_length_max_value_multiple(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args(self.fname, args, min_fname_arg_count, compat_args) @@ -159,7 +159,7 @@ def test_not_all_defaults(self): arg_vals = (1, -1, 3) for i in range(1, 3): - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): validate_args(self.fname, arg_vals[:i], 2, compat_args) def test_validation(self): @@ -188,7 +188,7 @@ def test_bad_kwarg(self): r"keyword argument '{arg}'".format( fname=self.fname, arg=badarg)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_kwargs(self.fname, kwargs, compat_args) def test_not_all_none(self): @@ -209,7 +209,7 @@ def test_not_all_none(self): kwargs = dict(zip(kwarg_keys[:i], kwarg_vals[:i])) - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): validate_kwargs(self.fname, kwargs, compat_args) def test_validation(self): @@ -228,11 +228,11 @@ def test_validate_bool_kwarg(self): for name in arg_names: for value in invalid_values: - with tm.assert_raises_regex(ValueError, - "For argument \"%s\" " - "expected type bool, " - "received type %s" % - (name, type(value).__name__)): + msg = ("For argument \"%s\" " + "expected type bool, " + "received type %s" % + (name, type(value).__name__)) + with pytest.raises(ValueError, match=msg): validate_bool_kwarg(value, name) for value in valid_values: @@ -255,7 +255,7 @@ def test_invalid_total_length_max_length_one(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -273,7 +273,7 @@ def test_invalid_total_length_max_length_multiple(self): .format(fname=self.fname, max_length=max_length, actual_length=actual_length)) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -292,17 +292,15 @@ def test_no_args_with_kwargs(self): args = () kwargs = {'foo': -5, bad_arg: 2} - tm.assert_raises_regex(ValueError, msg, - validate_args_and_kwargs, - self.fname, args, kwargs, - min_fname_arg_count, compat_args) + with pytest.raises(ValueError, match=msg): + validate_args_and_kwargs(self.fname, args, kwargs, + min_fname_arg_count, compat_args) args = (-5, 2) kwargs = {} - tm.assert_raises_regex(ValueError, msg, - validate_args_and_kwargs, - self.fname, args, kwargs, - min_fname_arg_count, compat_args) + with pytest.raises(ValueError, match=msg): + validate_args_and_kwargs(self.fname, args, kwargs, + min_fname_arg_count, compat_args) def test_duplicate_argument(self): min_fname_arg_count = 2 @@ -316,7 +314,7 @@ def test_duplicate_argument(self): msg = (r"{fname}\(\) got multiple values for keyword " r"argument '{arg}'".format(fname=self.fname, arg='foo')) - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): validate_args_and_kwargs(self.fname, args, kwargs, min_fname_arg_count, compat_args) @@ -343,7 +341,7 @@ def test_cannot_create_instance_of_stolenbuffer(self): ``move_into_mutable_buffer`` which has a bunch of checks in it. """ msg = "cannot create 'pandas.util._move.stolenbuf' instances" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): stolenbuf() def test_more_than_one_ref(self): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c6457545038e0..fd7012c87040f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2541,6 +2541,9 @@ def assert_raises_regex(_exception, _regexp, _callable=None, for use by `re.search()`. This is a port of the `assertRaisesRegexp` function from unittest in Python 2.7. + .. deprecated:: 0.24.0 + Use `pytest.raises` instead. + Examples -------- >>> assert_raises_regex(ValueError, 'invalid literal for.*XYZ', int, 'XYZ') @@ -2570,6 +2573,10 @@ def assert_raises_regex(_exception, _regexp, _callable=None, AssertionError: "banana" does not match "'str' object does not support \ item assignment" """ + warnings.warn(("assert_raises_regex has been deprecated and will " + "be removed in the next release. Please use " + "`pytest.raises` instead."), FutureWarning, stacklevel=2) + manager = _AssertRaisesContextmanager(exception=_exception, regexp=_regexp) if _callable is not None: with manager: From 274ef6ff9eeb6e0b3dc2c42e494d5c20ad454108 Mon Sep 17 00:00:00 2001 From: Markus Meier Date: Sun, 11 Nov 2018 02:05:54 +0100 Subject: [PATCH 03/11] DOC: Remove incorrect periods at the end of parameter types (#23600) --- pandas/core/dtypes/inference.py | 22 +++++++++++----------- pandas/core/generic.py | 4 ++-- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/range.py | 2 +- pandas/core/series.py | 2 +- pandas/core/strings.py | 4 ++-- pandas/io/clipboards.py | 2 +- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index d56bd83f01236..5f35a040d7d47 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -73,7 +73,7 @@ def is_string_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Examples -------- @@ -127,7 +127,7 @@ def is_iterator(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -172,7 +172,7 @@ def is_file_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -203,7 +203,7 @@ def is_re(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -227,7 +227,7 @@ def is_re_compilable(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -261,7 +261,7 @@ def is_list_like(obj, allow_sets=True): Parameters ---------- - obj : The object to check. + obj : The object to check allow_sets : boolean, default True If this parameter is False, sets will not be considered list-like @@ -310,7 +310,7 @@ def is_array_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -343,7 +343,7 @@ def is_nested_list_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -384,7 +384,7 @@ def is_dict_like(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -408,7 +408,7 @@ def is_named_tuple(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- @@ -468,7 +468,7 @@ def is_sequence(obj): Parameters ---------- - obj : The object to check. + obj : The object to check Returns ------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cfdc6b34274bf..b7ead5a098880 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5248,11 +5248,11 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): the same type. Alternatively, use {col: dtype, ...}, where col is a column label and dtype is a numpy.dtype or Python type to cast one or more of the DataFrame's columns to column-specific types. - copy : bool, default True. + copy : bool, default True Return a copy when ``copy=True`` (be very careful setting ``copy=False`` as changes to values then may propagate to other pandas objects). - errors : {'raise', 'ignore'}, default 'raise'. + errors : {'raise', 'ignore'}, default 'raise' Control raising of exceptions on invalid data for provided dtype. - ``raise`` : allow exceptions to be raised diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 56ab9b6c020c0..8da0672559006 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -501,7 +501,7 @@ def to_series(self, keep_tz=False, index=None, name=None): Parameters ---------- - keep_tz : optional, defaults False. + keep_tz : optional, defaults False return the data keeping the timezone. If keep_tz is True: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9c981c24190a4..01304cce507f0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1885,7 +1885,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): ascending : boolean, default True False to sort in descending order Can also be a list to specify a directed ordering - sort_remaining : sort by the remaining levels after level. + sort_remaining : sort by the remaining levels after level Returns ------- diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d1b5645928921..e4c177a08462e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -38,7 +38,7 @@ class RangeIndex(Int64Index): Parameters ---------- - start : int (default: 0), or other RangeIndex instance. + start : int (default: 0), or other RangeIndex instance If int and "stop" is not given, interpreted as "stop" instead. stop : int (default: 0) step : int (default: 1) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6971b0b0c78e0..b9f4b848b2ed7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3025,7 +3025,7 @@ def reorder_levels(self, order): Parameters ---------- - order : list of int representing new level order. + order : list of int representing new level order (reference level by number or key) Returns diff --git a/pandas/core/strings.py b/pandas/core/strings.py index bf0c93437f4dc..a12605aaed554 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -659,7 +659,7 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan): If True, case sensitive flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE - na : default NaN, fill value for missing values. + na : default NaN, fill value for missing values Returns ------- @@ -2665,7 +2665,7 @@ def encode(self, encoding, errors="strict"): Parameters ---------- - to_strip : str or None, default None. + to_strip : str or None, default None Specifying the set of characters to be removed. All combinations of this set of characters will be stripped. If None then whitespaces are removed. diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index c6108f30a560a..23a2b04214e4e 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -16,7 +16,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover Parameters ---------- - sep : str, default '\s+'. + sep : str, default '\s+' A string or regex delimiter. The default of '\s+' denotes one or more whitespace characters. From 2cea659a9a6a88d16e0d1fa698fb61dc6c3ce3d5 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 11 Nov 2018 01:28:18 +0000 Subject: [PATCH 04/11] DOC: Fixes to docstring to add validation to CI (#23560) --- pandas/core/frame.py | 38 +++++++++++++++++++++----------------- pandas/core/generic.py | 3 +-- pandas/core/panel.py | 16 ++++++++-------- pandas/core/series.py | 1 - 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b24f79e89902a..6b29725ba2bea 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -864,12 +864,17 @@ def iterrows(self): data types, the iterator returns a copy and not a view, and writing to it will have no effect. - Returns - ------- + Yields + ------ + index : label or tuple of label + The index of the row. A tuple for a `MultiIndex`. + data : Series + The data of the row as a Series. + it : generator A generator that iterates over the rows of the frame. - See also + See Also -------- itertuples : Iterate over DataFrame rows as namedtuples of the values. iteritems : Iterate over (column name, Series) pairs. @@ -3951,6 +3956,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False, necessary. Setting to False will improve the performance of this method + Returns + ------- + DataFrame + Examples -------- >>> df = pd.DataFrame({'month': [1, 4, 7, 10], @@ -3991,10 +4000,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False, 2 2014 4 40 3 2013 7 84 4 2014 10 31 - - Returns - ------- - dataframe : DataFrame """ inplace = validate_bool_kwarg(inplace, 'inplace') if not isinstance(keys, list): @@ -6694,6 +6699,15 @@ def round(self, decimals=0, *args, **kwargs): of `decimals` which are not columns of the input will be ignored. + Returns + ------- + DataFrame + + See Also + -------- + numpy.around + Series.round + Examples -------- >>> df = pd.DataFrame(np.random.random([3, 3]), @@ -6719,15 +6733,6 @@ def round(self, decimals=0, *args, **kwargs): first 0.0 1 0.17 second 0.0 1 0.58 third 0.9 0 0.49 - - Returns - ------- - DataFrame object - - See Also - -------- - numpy.around - Series.round """ from pandas.core.reshape.concat import concat @@ -6793,7 +6798,6 @@ def corr(self, method='pearson', min_periods=1): Examples -------- - >>> import numpy as np >>> histogram_intersection = lambda a, b: np.minimum(a, b ... ).sum().round(decimals=1) >>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b7ead5a098880..34f25c5634d5b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9691,8 +9691,7 @@ def nanptp(values, axis=0, skipna=True): cls.ptp = _make_stat_function( cls, 'ptp', name, name2, axis_descr, - """ - Returns the difference between the maximum value and the + """Returns the difference between the maximum value and the minimum value in the object. This is the equivalent of the ``numpy.ndarray`` method ``ptp``. diff --git a/pandas/core/panel.py b/pandas/core/panel.py index eb841e6398976..c878d16fac2e9 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -106,14 +106,14 @@ def panel_index(time, panels, names=None): class Panel(NDFrame): """ - Represents wide format panel data, stored as 3-dimensional array - - .. deprecated:: 0.20.0 - The recommended way to represent 3-D data are with a MultiIndex on a - DataFrame via the :attr:`~Panel.to_frame()` method or with the - `xarray package `__. - Pandas provides a :attr:`~Panel.to_xarray()` method to automate this - conversion. + Represents wide format panel data, stored as 3-dimensional array. + + .. deprecated:: 0.20.0 + The recommended way to represent 3-D data are with a MultiIndex on a + DataFrame via the :attr:`~Panel.to_frame()` method or with the + `xarray package `__. + Pandas provides a :attr:`~Panel.to_xarray()` method to automate this + conversion. Parameters ---------- diff --git a/pandas/core/series.py b/pandas/core/series.py index b9f4b848b2ed7..20e4720a3bde7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1913,7 +1913,6 @@ def corr(self, other, method='pearson', min_periods=None): Examples -------- - >>> import numpy as np >>> histogram_intersection = lambda a, b: np.minimum(a, b ... ).sum().round(decimals=1) >>> s1 = pd.Series([.2, .0, .6, .2]) From 3230468980be14f4a75a3b2e99946914e9d53133 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 11 Nov 2018 08:24:42 -0600 Subject: [PATCH 05/11] TST: Fix integer ops comparison test (#23619) The `op(Series[integer], other)` path was being tested twice. The `op(IntegerArray, other)` path was not being tested. Closes https://github.com/pandas-dev/pandas/issues/22096 --- pandas/tests/arrays/test_integer.py | 10 ++++------ setup.cfg | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 10f54458e4980..51cd139a6ccad 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -314,11 +314,11 @@ def test_rpow_one_to_na(self): class TestComparisonOps(BaseOpsUtil): - def _compare_other(self, s, data, op_name, other): + def _compare_other(self, data, op_name, other): op = self.get_op_from_name(op_name) # array - result = op(s, other) + result = pd.Series(op(data, other)) expected = pd.Series(op(data._data, other)) # fill the nan locations @@ -340,14 +340,12 @@ def _compare_other(self, s, data, op_name, other): def test_compare_scalar(self, data, all_compare_operators): op_name = all_compare_operators - s = pd.Series(data) - self._compare_other(s, data, op_name, 0) + self._compare_other(data, op_name, 0) def test_compare_array(self, data, all_compare_operators): op_name = all_compare_operators - s = pd.Series(data) other = pd.Series([0] * len(data)) - self._compare_other(s, data, op_name, other) + self._compare_other(data, op_name, other) class TestCasting(object): diff --git a/setup.cfg b/setup.cfg index 4726a0ddb2fb2..2e07182196d5b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,7 +90,7 @@ known_post_core=pandas.tseries,pandas.io,pandas.plotting sections=FUTURE,STDLIB,THIRDPARTY,PRE_CORE,DTYPES,FIRSTPARTY,POST_CORE,LOCALFOLDER known_first_party=pandas -known_third_party=Cython,numpy,python-dateutil,pytz,pyarrow +known_third_party=Cython,numpy,python-dateutil,pytz,pyarrow,pytest multi_line_output=4 force_grid_wrap=0 combine_as_imports=True From 602f6ff4a8451cb3e45f80e42ae8c56d44268893 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 11 Nov 2018 08:48:37 -0600 Subject: [PATCH 06/11] TST: Unskip some Categorical Tests (#23613) --- pandas/tests/extension/test_categorical.py | 95 +++++++++------------- 1 file changed, 40 insertions(+), 55 deletions(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 7fd389e19325c..279bfb5dc8eab 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -72,10 +72,10 @@ class TestDtype(base.BaseDtypeTests): class TestInterface(base.BaseInterfaceTests): - @pytest.mark.skip(reason="Memory usage doesn't match") - def test_memory_usage(self): + @pytest.mark.skip(reason="Memory usage doesn't match", strict=True) + def test_memory_usage(self, data): # Is this deliberate? - pass + super(TestInterface, self).test_memory_usage(data) class TestConstructors(base.BaseConstructorsTests): @@ -83,69 +83,56 @@ class TestConstructors(base.BaseConstructorsTests): class TestReshaping(base.BaseReshapingTests): - @pytest.mark.skip(reason="Unobserved categories preseved in concat.") - def test_concat_columns(self, data, na_value): - pass - - @pytest.mark.skip(reason="Unobserved categories preseved in concat.") - def test_align(self, data, na_value): - pass - - @pytest.mark.skip(reason="Unobserved categories preseved in concat.") - def test_align_frame(self, data, na_value): - pass - - @pytest.mark.skip(reason="Unobserved categories preseved in concat.") - def test_merge(self, data, na_value): - pass + pass class TestGetitem(base.BaseGetitemTests): - skip_take = pytest.mark.skip(reason="GH-20664.") + skip_take = pytest.mark.skip(reason="GH-20664.", strict=True) - @pytest.mark.skip(reason="Backwards compatibility") - def test_getitem_scalar(self): + @pytest.mark.skip(reason="Backwards compatibility", strict=True) + def test_getitem_scalar(self, data): # CategoricalDtype.type isn't "correct" since it should # be a parent of the elements (object). But don't want # to break things by changing. - pass + super(TestGetitem, self).test_getitem_scalar(data) @skip_take - def test_take(self): + def test_take(self, data, na_value, na_cmp): # TODO remove this once Categorical.take is fixed - pass + super(TestGetitem, self).test_take(data, na_value, na_cmp) @skip_take - def test_take_negative(self): - pass + def test_take_negative(self, data): + super().test_take_negative(data) @skip_take - def test_take_pandas_style_negative_raises(self): - pass + def test_take_pandas_style_negative_raises(self, data, na_value): + super().test_take_pandas_style_negative_raises(data, na_value) @skip_take - def test_take_non_na_fill_value(self): - pass + def test_take_non_na_fill_value(self, data_missing): + super().test_take_non_na_fill_value(data_missing) @skip_take - def test_take_out_of_bounds_raises(self): - pass + def test_take_out_of_bounds_raises(self, data, allow_fill): + return super().test_take_out_of_bounds_raises(data, allow_fill) - @pytest.mark.skip(reason="GH-20747. Unobserved categories.") - def test_take_series(self): - pass + @pytest.mark.skip(reason="GH-20747. Unobserved categories.", strict=True) + def test_take_series(self, data): + super().test_take_series(data) @skip_take - def test_reindex_non_na_fill_value(self): - pass + def test_reindex_non_na_fill_value(self, data_missing): + super().test_reindex_non_na_fill_value(data_missing) - @pytest.mark.skip(reason="Categorical.take buggy") - def test_take_empty(self): - pass + @pytest.mark.skip(reason="Categorical.take buggy", strict=True) + def test_take_empty(self, data, na_value, na_cmp): + super().test_take_empty(data, na_value, na_cmp) - @pytest.mark.skip(reason="test not written correctly for categorical") - def test_reindex(self): - pass + @pytest.mark.skip(reason="test not written correctly for categorical", + strict=True) + def test_reindex(self, data, na_value): + super().test_reindex(data, na_value) class TestSetitem(base.BaseSetitemTests): @@ -154,13 +141,13 @@ class TestSetitem(base.BaseSetitemTests): class TestMissing(base.BaseMissingTests): - @pytest.mark.skip(reason="Not implemented") - def test_fillna_limit_pad(self): - pass + @pytest.mark.skip(reason="Not implemented", strict=True) + def test_fillna_limit_pad(self, data_missing): + super().test_fillna_limit_pad(data_missing) - @pytest.mark.skip(reason="Not implemented") - def test_fillna_limit_backfill(self): - pass + @pytest.mark.skip(reason="Not implemented", strict=True) + def test_fillna_limit_backfill(self, data_missing): + super().test_fillna_limit_backfill(data_missing) class TestReduce(base.BaseNoReduceTests): @@ -168,11 +155,9 @@ class TestReduce(base.BaseNoReduceTests): class TestMethods(base.BaseMethodsTests): - pass - - @pytest.mark.skip(reason="Unobserved categories included") + @pytest.mark.skip(reason="Unobserved categories included", strict=True) def test_value_counts(self, all_data, dropna): - pass + return super().test_value_counts(all_data, dropna) def test_combine_add(self, data_repeated): # GH 20825 @@ -190,9 +175,9 @@ def test_combine_add(self, data_repeated): expected = pd.Series([a + val for a in list(orig_data1)]) self.assert_series_equal(result, expected) - @pytest.mark.skip(reason="Not Applicable") + @pytest.mark.skip(reason="Not Applicable", strict=True) def test_fillna_length_mismatch(self, data_missing): - pass + super().test_fillna_length_mismatch(data_missing) class TestCasting(base.BaseCastingTests): From 00ca0f9cafa8528c8835d920ff24c71104690def Mon Sep 17 00:00:00 2001 From: Thein Oo Date: Sun, 11 Nov 2018 09:50:26 -0500 Subject: [PATCH 07/11] DOC: Fix Order of parameters in docstrings (#23611) --- pandas/core/reshape/pivot.py | 4 ++-- pandas/core/window.py | 35 +++++++++++++++++------------------ pandas/io/json/normalize.py | 2 +- pandas/tseries/offsets.py | 4 ++-- pandas/util/testing.py | 8 ++++---- 5 files changed, 26 insertions(+), 27 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index ec4cdffc56435..d12dbb81765d8 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -407,12 +407,12 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, values : array-like, optional Array of values to aggregate according to the factors. Requires `aggfunc` be specified. - aggfunc : function, optional - If specified, requires `values` be specified as well rownames : sequence, default None If passed, must match number of row arrays passed colnames : sequence, default None If passed, must match number of column arrays passed + aggfunc : function, optional + If specified, requires `values` be specified as well margins : boolean, default False Add row/column margins (subtotals) margins_name : string, default 'All' diff --git a/pandas/core/window.py b/pandas/core/window.py index 5256532a31870..be28a3bcccec6 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -136,7 +136,7 @@ def _gotitem(self, key, ndim, subset=None): Parameters ---------- - key : string / list of selections + key : str / list of selections ndim : 1,2 requested ndim of result subset : object, default None @@ -464,15 +464,16 @@ class Window(_Window): (otherwise result is NA). For a window that is specified by an offset, `min_periods` will default to 1. Otherwise, `min_periods` will default to the size of the window. - center : boolean, default False + center : bool, default False Set the labels at the center of the window. - win_type : string, default None + win_type : str, default None Provide a window type. If ``None``, all points are evenly weighted. See the notes below for further information. - on : string, optional + on : str, optional For a DataFrame, column on which to calculate the rolling window, rather than the index - closed : string, default None + axis : int or str, default 0 + closed : str, default None Make the interval closed on the 'right', 'left', 'both' or 'neither' endpoints. For offset-based windows, it defaults to 'right'. @@ -481,8 +482,6 @@ class Window(_Window): .. versionadded:: 0.20.0 - axis : int or string, default 0 - Returns ------- a Window or Rolling sub-classed for the particular operation @@ -661,7 +660,7 @@ def _apply_window(self, mean=True, **kwargs): Parameters ---------- - mean : boolean, default True + mean : bool, default True If True computes weighted mean, else weighted sum Returns @@ -819,11 +818,11 @@ def _apply(self, func, name=None, window=None, center=None, Parameters ---------- - func : string/callable to apply - name : string, optional + func : str/callable to apply + name : str, optional name of this function window : int/array, default to _get_window() - center : boolean, default to self.center + center : bool, default to self.center check_minp : function, default to _use_window Returns @@ -1816,9 +1815,9 @@ class Expanding(_Rolling_and_Expanding): min_periods : int, default 1 Minimum number of observations in window required to have a value (otherwise result is NA). - center : boolean, default False + center : bool, default False Set the labels at the center of the window. - axis : int or string, default 0 + axis : int or str, default 0 Returns ------- @@ -2062,7 +2061,7 @@ def _constructor(self): Parameters ---------- -bias : boolean, default False +bias : bool, default False Use a standard estimation bias correction """ @@ -2079,7 +2078,7 @@ def _constructor(self): will be a MultiIndex DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. -bias : boolean, default False +bias : bool, default False Use a standard estimation bias correction """ @@ -2110,10 +2109,10 @@ class EWM(_Rolling): min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). - adjust : boolean, default True + adjust : bool, default True Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings (viewing EWMA as a moving average) - ignore_na : boolean, default False + ignore_na : bool, default False Ignore missing values when calculating weights; specify True to reproduce pre-0.15.0 behavior @@ -2242,7 +2241,7 @@ def _apply(self, func, **kwargs): Parameters ---------- - func : string/callable to apply + func : str/callable to apply Returns ------- diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index ce07a795017e5..af046d9f309e7 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -110,10 +110,10 @@ def json_normalize(data, record_path=None, meta=None, assumed to be an array of records meta : list of paths (string or list of strings), default None Fields to use as metadata for each record in resulting table + meta_prefix : string, default None record_prefix : string, default None If True, prefix records with dotted (?) path, e.g. foo.bar.field if path to records is ['foo', 'bar'] - meta_prefix : string, default None errors : {'raise', 'ignore'}, default 'raise' * 'ignore' : will ignore KeyError if keys listed in meta are not diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 6fb562e301ac2..53719b71d1180 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -807,7 +807,6 @@ class CustomBusinessDay(_CustomMixin, BusinessDay): Parameters ---------- n : int, default 1 - offset : timedelta, default timedelta(0) normalize : bool, default False Normalize start/end dates to midnight before generating date range weekmask : str, Default 'Mon Tue Wed Thu Fri' @@ -816,6 +815,7 @@ class CustomBusinessDay(_CustomMixin, BusinessDay): list/array of dates to exclude from the set of valid business days, passed to ``numpy.busdaycalendar`` calendar : pd.HolidayCalendar or np.busdaycalendar + offset : timedelta, default timedelta(0) """ _prefix = 'C' _attributes = frozenset(['n', 'normalize', @@ -958,7 +958,6 @@ class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset): Parameters ---------- n : int, default 1 - offset : timedelta, default timedelta(0) normalize : bool, default False Normalize start/end dates to midnight before generating date range weekmask : str, Default 'Mon Tue Wed Thu Fri' @@ -967,6 +966,7 @@ class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset): list/array of dates to exclude from the set of valid business days, passed to ``numpy.busdaycalendar`` calendar : pd.HolidayCalendar or np.busdaycalendar + offset : timedelta, default timedelta(0) """ _attributes = frozenset(['n', 'normalize', 'weekmask', 'holidays', 'calendar', 'offset']) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index fd7012c87040f..748f3bbc5b497 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1242,18 +1242,18 @@ def assert_series_equal(left, right, check_dtype=True, check_less_precise : bool or int, default False Specify comparison precision. Only used when check_exact is False. 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare - check_exact : bool, default False - Whether to compare number exactly. + If int, then specify the digits to compare. check_names : bool, default True Whether to check the Series and Index names attribute. + check_exact : bool, default False + Whether to compare number exactly. check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. obj : str, default 'Series' Specify object name being compared, internally used to show appropriate - assertion message + assertion message. """ __tracebackhide__ = True From 4c63f3e0672be8345ad2a152f5ba506eefde8312 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Nov 2018 07:08:14 -0800 Subject: [PATCH 08/11] CLN: use float64_t consistently instead of double, double_t (#23583) --- pandas/_libs/algos.pxd | 3 - pandas/_libs/algos.pyx | 18 +- pandas/_libs/algos_common_helper.pxi.in | 4 +- pandas/_libs/algos_rank_helper.pxi.in | 10 +- pandas/_libs/algos_take_helper.pxi.in | 4 +- pandas/_libs/groupby.pyx | 34 ++- pandas/_libs/groupby_helper.pxi.in | 22 +- pandas/_libs/hashtable.pyx | 8 +- pandas/_libs/hashtable_class_helper.pxi.in | 4 +- pandas/_libs/index.pyx | 6 +- pandas/_libs/index_class_helper.pxi.in | 4 +- pandas/_libs/interval.pyx | 15 +- pandas/_libs/intervaltree.pxi.in | 25 +-- pandas/_libs/join.pyx | 6 +- pandas/_libs/lib.pyx | 26 ++- pandas/_libs/missing.pyx | 21 +- pandas/_libs/parsers.pyx | 36 ++-- pandas/_libs/sparse.pyx | 67 ------ pandas/_libs/sparse_op_helper.pxi.in | 4 +- pandas/_libs/tslib.pyx | 16 +- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/_libs/window.pyx | 196 ++++++++++-------- pandas/tests/arrays/sparse/test_array.py | 4 +- pandas/tests/frame/test_operators.py | 3 +- pandas/tests/frame/test_repr_info.py | 5 +- pandas/tests/frame/test_timeseries.py | 7 +- pandas/tests/frame/test_to_csv.py | 3 +- pandas/tests/groupby/aggregate/test_cython.py | 5 +- pandas/tests/series/test_operators.py | 9 +- 29 files changed, 251 insertions(+), 316 deletions(-) diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd index 0888cf3c85f2f..5df1e381ea3ce 100644 --- a/pandas/_libs/algos.pxd +++ b/pandas/_libs/algos.pxd @@ -1,9 +1,6 @@ from util cimport numeric -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil - - cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: cdef: numeric t diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 075e2c5129579..e77899507833f 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -15,8 +15,7 @@ from numpy cimport (ndarray, NPY_FLOAT32, NPY_FLOAT64, NPY_OBJECT, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t, - double_t) + uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() @@ -32,10 +31,9 @@ import missing cdef float64_t FP_ERR = 1e-13 -cdef double NaN = np.NaN -cdef double nan = NaN +cdef float64_t NaN = np.NaN -cdef int64_t iNaT = get_nat() +cdef int64_t NPY_NAT = get_nat() tiebreakers = { 'average': TIEBREAK_AVERAGE, @@ -199,7 +197,7 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): @cython.boundscheck(False) @cython.wraparound(False) -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil: +def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric: cdef: Py_ssize_t i, j, l, m, n = a.shape[0] numeric x @@ -812,7 +810,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): n = len(arr) if n == 1: - if arr[0] != arr[0] or (timelike and arr[0] == iNaT): + if arr[0] != arr[0] or (timelike and arr[0] == NPY_NAT): # single value is NaN return False, False, True else: @@ -820,7 +818,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): elif n < 2: return True, True, True - if timelike and arr[0] == iNaT: + if timelike and arr[0] == NPY_NAT: return False, False, True if algos_t is not object: @@ -828,7 +826,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): prev = arr[0] for i in range(1, n): cur = arr[i] - if timelike and cur == iNaT: + if timelike and cur == NPY_NAT: is_monotonic_inc = 0 is_monotonic_dec = 0 break @@ -853,7 +851,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): prev = arr[0] for i in range(1, n): cur = arr[i] - if timelike and cur == iNaT: + if timelike and cur == NPY_NAT: is_monotonic_inc = 0 is_monotonic_dec = 0 break diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index c2b0a4119e6e5..3708deb1a4b76 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -84,9 +84,9 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # ensure_dtype -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- cdef int PLATFORM_INT = (np.arange(0, dtype=np.intp)).descr.type_num diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index fcb052e8be63b..4d144dcf2808a 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -74,9 +74,9 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} - mask = values == iNaT + mask = values == NPY_NAT - # create copy in case of iNaT + # create copy in case of NPY_NAT # values are mutated inplace if mask.any(): values = values.copy() @@ -149,7 +149,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', {{if dtype != 'uint64'}} isnan = sorted_mask[i] if isnan and keep_na: - ranks[argsorted[i]] = nan + ranks[argsorted[i]] = NaN continue {{endif}} @@ -257,7 +257,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} - mask = values == iNaT + mask = values == NPY_NAT {{endif}} np.putmask(values, mask, nan_value) @@ -317,7 +317,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', {{else}} if (val == nan_value) and keep_na: {{endif}} - ranks[i, argsorted[i, j]] = nan + ranks[i, argsorted[i, j]] = NaN {{if dtype == 'object'}} infs += 1 diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index bd5feef1ff2b0..2fea8b17fd9d7 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for take WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # take_1d, take_2d -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 83ded64b742ed..7c16b29f3e42b 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1,14 +1,13 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp from numpy cimport (ndarray, - double_t, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() @@ -20,10 +19,9 @@ from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN, TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE) from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers -cdef int64_t iNaT = get_nat() +cdef int64_t NPY_NAT = get_nat() -cdef double NaN = np.NaN -cdef double nan = NaN +cdef float64_t NaN = np.NaN cdef inline float64_t median_linear(float64_t* a, int n) nogil: @@ -67,13 +65,13 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: return result -# TODO: Is this redundant with algos.kth_smallest? +# TODO: Is this redundant with algos.kth_smallest cdef inline float64_t kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n) nogil: cdef: Py_ssize_t i, j, l, m - double_t x, t + float64_t x, t l = 0 m = n - 1 @@ -109,7 +107,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, ngroups, size ndarray[int64_t] _counts - ndarray data + ndarray[float64_t, ndim=2] data float64_t* ptr assert min_count == -1, "'min_count' only used in add and prod" @@ -139,8 +137,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) def group_cumprod_float64(float64_t[:, :] out, - float64_t[:, :] values, - int64_t[:] labels, + const float64_t[:, :] values, + const int64_t[:] labels, bint is_datetimelike, bint skipna=True): """ @@ -177,7 +175,7 @@ def group_cumprod_float64(float64_t[:, :] out, @cython.wraparound(False) def group_cumsum(numeric[:, :] out, numeric[:, :] values, - int64_t[:] labels, + const int64_t[:] labels, is_datetimelike, bint skipna=True): """ @@ -217,7 +215,7 @@ def group_cumsum(numeric[:, :] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, +def group_shift_indexer(int64_t[:] out, const int64_t[:] labels, int ngroups, int periods): cdef: Py_ssize_t N, i, j, ii @@ -291,7 +289,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, """ cdef: Py_ssize_t i, N - ndarray[int64_t] sorted_labels + int64_t[:] sorted_labels int64_t idx, curr_fill_idx=-1, filled_vals=0 N = len(out) @@ -327,10 +325,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, @cython.boundscheck(False) @cython.wraparound(False) -def group_any_all(ndarray[uint8_t] out, - ndarray[int64_t] labels, - ndarray[uint8_t] values, - ndarray[uint8_t] mask, +def group_any_all(uint8_t[:] out, + const int64_t[:] labels, + const uint8_t[:] values, + const uint8_t[:] mask, object val_test, bint skipna): """Aggregated boolean values to show truthfulness of group elements diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 484a4b069305f..523d43f893aad 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -5,7 +5,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ cdef extern from "numpy/npy_math.h": - double NAN "NPY_NAN" + float64_t NAN "NPY_NAN" _int64_max = np.iinfo(np.int64).max # ---------------------------------------------------------------------- @@ -268,16 +268,16 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # group_nth, group_last, group_rank -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: # name, c_type, nan_val dtypes = [('float64', 'float64_t', 'NAN'), ('float32', 'float32_t', 'NAN'), - ('int64', 'int64_t', 'iNaT'), + ('int64', 'int64_t', 'NPY_NAT'), ('object', 'object', 'NAN')] def get_dispatch(dtypes): @@ -527,7 +527,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # to the result where appropriate if keep_na and mask[_as[i]]: for j in range(i - dups + 1, i + 1): - out[_as[j], 0] = nan + out[_as[j], 0] = NaN grp_na_count = dups elif tiebreak == TIEBREAK_AVERAGE: for j in range(i - dups + 1, i + 1): @@ -630,7 +630,7 @@ def group_max(ndarray[groupby_t, ndim=2] out, if groupby_t is int64_t: # Note: evaluated at compile-time maxx[:] = -_int64_max - nan_val = iNaT + nan_val = NPY_NAT else: maxx[:] = -np.inf nan_val = NAN @@ -692,7 +692,7 @@ def group_min(ndarray[groupby_t, ndim=2] out, minx = np.empty_like(out) if groupby_t is int64_t: minx[:] = _int64_max - nan_val = iNaT + nan_val = NPY_NAT else: minx[:] = np.inf nan_val = NAN @@ -762,8 +762,8 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, # val = nan if groupby_t is int64_t: - if is_datetimelike and val == iNaT: - out[i, j] = iNaT + if is_datetimelike and val == NPY_NAT: + out[i, j] = NPY_NAT else: mval = accum[lab, j] if val < mval: @@ -809,8 +809,8 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, val = values[i, j] if groupby_t is int64_t: - if is_datetimelike and val == iNaT: - out[i, j] = iNaT + if is_datetimelike and val == NPY_NAT: + out[i, j] = NPY_NAT else: mval = accum[lab, j] if val > mval: diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index d38b72ccebbb2..9aa887727a765 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -9,11 +9,11 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp -from numpy cimport ndarray, uint8_t, uint32_t +from numpy cimport ndarray, uint8_t, uint32_t, float64_t cnp.import_array() cdef extern from "numpy/npy_math.h": - double NAN "NPY_NAN" + float64_t NAN "NPY_NAN" from khash cimport ( @@ -42,9 +42,7 @@ cimport util from missing cimport checknull -nan = np.nan - -cdef int64_t iNaT = util.get_nat() +cdef int64_t NPY_NAT = util.get_nat() _SIZE_HINT_LIMIT = (1 << 20) + 7 diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 36ed8a88aa78b..a71023ed34f44 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -251,9 +251,9 @@ cdef class HashTable: {{py: # name, dtype, float_group, default_na_value -dtypes = [('Float64', 'float64', True, 'nan'), +dtypes = [('Float64', 'float64', True, 'np.nan'), ('UInt64', 'uint64', False, 0), - ('Int64', 'int64', False, 'iNaT')] + ('Int64', 'int64', False, 'NPY_NAT')] }} diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index d418ac63a4ac8..7930f583274b5 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -25,7 +25,7 @@ from pandas._libs import algos, hashtable as _hash from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib from pandas._libs.missing import checknull -cdef int64_t iNaT = util.get_nat() +cdef int64_t NPY_NAT = util.get_nat() cdef inline bint is_definitely_invalid_key(object val): @@ -520,7 +520,7 @@ cpdef convert_scalar(ndarray arr, object value): elif isinstance(value, (datetime, np.datetime64, date)): return Timestamp(value).value elif value is None or value != value: - return iNaT + return NPY_NAT elif util.is_string_object(value): return Timestamp(value).value raise ValueError("cannot set a Timestamp with a non-timestamp") @@ -531,7 +531,7 @@ cpdef convert_scalar(ndarray arr, object value): elif isinstance(value, timedelta): return Timedelta(value).value elif value is None or value != value: - return iNaT + return NPY_NAT elif util.is_string_object(value): return Timedelta(value).value raise ValueError("cannot set a Timedelta with a non-timedelta") diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index c19812efaaa35..ff95917f6643a 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -4,9 +4,9 @@ Template for functions of IndexEngine subclasses. WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # IndexEngine Subclass Methods -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index a395fdbabeca2..dae88d3b707bf 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -1,20 +1,27 @@ # -*- coding: utf-8 -*- import numbers +from operator import le, lt from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE, PyObject_RichCompare) -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np -from numpy cimport ndarray +cimport numpy as cnp +from numpy cimport ( + int64_t, int32_t, float64_t, float32_t, uint64_t, + ndarray, + PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) +cnp.import_array() -from operator import le, lt cimport util util.import_array() +from hashtable cimport Int64Vector, Int64VectorData + from tslibs import Timestamp from tslibs.timezones cimport tz_compare diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index f9427fbbcd900..aa53f5086b894 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -4,21 +4,6 @@ Template for intervaltree WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -from numpy cimport ( - int64_t, int32_t, float64_t, float32_t, uint64_t, - ndarray, - PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) -import numpy as np - -cimport cython -from cython cimport Py_ssize_t - -cimport numpy as cnp -cnp.import_array() - -from hashtable cimport Int64Vector, Int64VectorData - - ctypedef fused scalar_t: float64_t float32_t @@ -26,10 +11,9 @@ ctypedef fused scalar_t: int32_t uint64_t - -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # IntervalTree -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- cdef class IntervalTree(IntervalMixin): """A centered interval tree @@ -203,9 +187,10 @@ cdef sort_values_and_indices(all_values, all_indices, subset): sorted_indices = take(indices, sorter) return sorted_values, sorted_indices -#---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- # Nodes -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # we need specialized nodes and leaves to optimize for different dtype and # closed values diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 748f3f265dd34..54dfeeff1452d 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -10,10 +10,6 @@ from numpy cimport (ndarray, uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() - -cdef double NaN = np.NaN -cdef double nan = NaN - from pandas._libs.algos import groupsort_indexer, ensure_platform_int from pandas.core.algorithms import take_nd @@ -673,7 +669,7 @@ ctypedef fused asof_t: int32_t int64_t float - double + float64_t ctypedef fused by_t: object diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a9e0fcbc4a826..cfc60256e97a3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -45,13 +45,14 @@ cdef extern from "numpy/arrayobject.h": cdef extern from "src/parse_helper.h": - int floatify(object, double *result, int *maybe_int) except -1 + int floatify(object, float64_t *result, int *maybe_int) except -1 cimport util from util cimport (is_nan, UINT8_MAX, UINT64_MAX, INT64_MAX, INT64_MIN) from tslib import array_to_datetime +from tslibs.nattype cimport NPY_NAT from tslibs.nattype import NaT from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 @@ -67,11 +68,8 @@ cdef object oINT64_MAX = INT64_MAX cdef object oINT64_MIN = INT64_MIN cdef object oUINT64_MAX = UINT64_MAX -cdef int64_t NPY_NAT = util.get_nat() -iNaT = util.get_nat() - cdef bint PY2 = sys.version_info[0] == 2 -cdef double nan = np.NaN +cdef float64_t NaN = np.NaN def values_from_object(obj: object): @@ -104,7 +102,7 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t: # ---------------------------------------------------------------------- -def is_scalar(val: object) -> bint: +def is_scalar(val: object) -> bool: """ Return True if given value is scalar. @@ -628,7 +626,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, nat_count = 0 if hasnans: - mask = values == iNaT + mask = values == NPY_NAT nat_count = np.sum(mask) values = values[~mask] @@ -1816,7 +1814,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, if val.__hash__ is not None and val in na_values: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN elif util.is_float_object(val): fval = val if fval != fval: @@ -1847,11 +1845,11 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, seen.bool_ = True elif val is None: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN elif hasattr(val, '__len__') and len(val) == 0: if convert_empty or seen.coerce_numeric: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN else: raise ValueError('Empty string encountered') elif util.is_complex_object(val): @@ -1866,7 +1864,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, if fval in na_values: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN else: if fval != fval: seen.null_ = True @@ -1899,7 +1897,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, elif "uint64" in str(e): # Exception from check functions. raise seen.saw_null() - floats[i] = nan + floats[i] = NaN if seen.check_uint64_conflict(): return values @@ -1967,10 +1965,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, floats[i] = complexes[i] = fnan elif val is NaT: if convert_datetime: - idatetimes[i] = iNaT + idatetimes[i] = NPY_NAT seen.datetime_ = 1 if convert_timedelta: - itimedeltas[i] = iNaT + itimedeltas[i] = NPY_NAT seen.timedelta_ = 1 if not (convert_datetime or convert_timedelta): seen.object_ = 1 diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index b8791359241ad..1fdb04dd10d8e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -5,16 +5,17 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport ndarray, int64_t, uint8_t +from numpy cimport ndarray, int64_t, uint8_t, float64_t cnp.import_array() cimport util from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value +from tslibs.nattype cimport checknull_with_nat from tslibs.nattype import NaT -cdef double INF = np.inf -cdef double NEGINF = -INF +cdef float64_t INF = np.inf +cdef float64_t NEGINF = -INF cdef int64_t NPY_NAT = util.get_nat() @@ -295,9 +296,7 @@ def isneginf_scalar(val: object) -> bool: cdef inline bint is_null_datetime64(v): # determine if we have a null for a datetime (or integer versions), # excluding np.timedelta64('nat') - if v is None or util.is_nan(v): - return True - elif v is NaT: + if checknull_with_nat(v): return True elif util.is_datetime64_object(v): return v.view('int64') == NPY_NAT @@ -307,9 +306,7 @@ cdef inline bint is_null_datetime64(v): cdef inline bint is_null_timedelta64(v): # determine if we have a null for a timedelta (or integer versions), # excluding np.datetime64('nat') - if v is None or util.is_nan(v): - return True - elif v is NaT: + if checknull_with_nat(v): return True elif util.is_timedelta64_object(v): return v.view('int64') == NPY_NAT @@ -319,8 +316,4 @@ cdef inline bint is_null_timedelta64(v): cdef inline bint is_null_period(v): # determine if we have a null for a Period (or integer versions), # excluding np.datetime64('nat') and np.timedelta64('nat') - if v is None or util.is_nan(v): - return True - elif v is NaT: - return True - return False + return checknull_with_nat(v) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 391de339ad60e..3870a55c22fd6 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -65,8 +65,8 @@ CParserError = ParserError cdef bint PY3 = (sys.version_info[0] >= 3) -cdef double INF = np.inf -cdef double NEGINF = -INF +cdef float64_t INF = np.inf +cdef float64_t NEGINF = -INF cdef extern from "errno.h": @@ -182,10 +182,10 @@ cdef extern from "parser/tokenizer.h": int64_t skip_first_N_rows int64_t skipfooter # pick one, depending on whether the converter requires GIL - double (*double_converter_nogil)(const char *, char **, - char, char, char, int) nogil - double (*double_converter_withgil)(const char *, char **, - char, char, char, int) + float64_t (*double_converter_nogil)(const char *, char **, + char, char, char, int) nogil + float64_t (*double_converter_withgil)(const char *, char **, + char, char, char, int) # error handling char *warn_msg @@ -233,12 +233,12 @@ cdef extern from "parser/tokenizer.h": uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, uint64_t uint_max, int *error, char tsep) nogil - double xstrtod(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing) nogil - double precise_xstrtod(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing) nogil - double round_trip(const char *p, char **q, char decimal, char sci, + float64_t xstrtod(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing) nogil + float64_t precise_xstrtod(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing) nogil + float64_t round_trip(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing) nogil int to_boolean(const char *item, uint8_t *val) nogil @@ -1697,8 +1697,8 @@ cdef _try_double(parser_t *parser, int64_t col, coliter_t it const char *word = NULL char *p_end - double *data - double NA = na_values[np.float64] + float64_t *data + float64_t NA = na_values[np.float64] kh_float64_t *na_fset ndarray result khiter_t k @@ -1706,7 +1706,7 @@ cdef _try_double(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.float64) - data = result.data + data = result.data na_fset = kset_float64_from_list(na_flist) if parser.double_converter_nogil != NULL: # if it can run without the GIL with nogil: @@ -1717,8 +1717,8 @@ cdef _try_double(parser_t *parser, int64_t col, else: assert parser.double_converter_withgil != NULL error = _try_double_nogil(parser, - parser.double_converter_withgil, col, line_start, line_end, na_filter, na_hashset, use_na_flist, @@ -1730,14 +1730,14 @@ cdef _try_double(parser_t *parser, int64_t col, cdef inline int _try_double_nogil(parser_t *parser, - double (*double_converter)( + float64_t (*double_converter)( const char *, char **, char, char, char, int) nogil, int col, int line_start, int line_end, bint na_filter, kh_str_t *na_hashset, bint use_na_flist, const kh_float64_t *na_flist, - double NA, double *data, + float64_t NA, float64_t *data, int *na_count) nogil: cdef: int error, diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index b8ca744ac88c4..668bd0ae6bbb7 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -22,9 +22,6 @@ _np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11') cdef float64_t NaN = np.NaN cdef float64_t INF = np.inf -cdef inline int int_max(int a, int b): return a if a >= b else b -cdef inline int int_min(int a, int b): return a if a <= b else b - # ----------------------------------------------------------------------------- @@ -673,13 +670,6 @@ cdef class BlockMerge(object): self.yi = xi -cdef class BlockIntersection(BlockMerge): - """ - not done yet - """ - pass - - cdef class BlockUnion(BlockMerge): """ Object-oriented approach makes sharing state between recursive functions a @@ -805,63 +795,6 @@ cdef class BlockUnion(BlockMerge): include "sparse_op_helper.pxi" -# ----------------------------------------------------------------------------- -# Indexing operations - -def get_reindexer(ndarray[object, ndim=1] values, dict index_map): - cdef: - object idx - Py_ssize_t i - Py_ssize_t new_length = len(values) - ndarray[int32_t, ndim=1] indexer - - indexer = np.empty(new_length, dtype=np.int32) - - for i in range(new_length): - idx = values[i] - if idx in index_map: - indexer[i] = index_map[idx] - else: - indexer[i] = -1 - - return indexer - -# def reindex_block(ndarray[float64_t, ndim=1] values, -# BlockIndex sparse_index, -# ndarray[int32_t, ndim=1] indexer): -# cdef: -# Py_ssize_t i, length -# ndarray[float64_t, ndim=1] out - -# out = np.empty(length, dtype=np.float64) - -# for i in range(length): -# if indexer[i] == -1: -# pass - - -# cdef class SparseCruncher(object): -# """ -# Class to acquire float pointer for convenient operations on sparse data -# structures -# """ -# cdef: -# SparseIndex index -# float64_t* buf - -# def __init__(self, ndarray[float64_t, ndim=1, mode='c'] values, -# SparseIndex index): - -# self.index = index -# self.buf = values.data - - -def reindex_integer(ndarray[float64_t, ndim=1] values, - IntIndex sparse_index, - ndarray[int32_t, ndim=1] indexer): - pass - - # ----------------------------------------------------------------------------- # SparseArray mask create operations diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index d02a985de1d61..1f41096a3f194 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for sparse ops WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Sparse op -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- ctypedef fused sparse_t: float64_t diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9012ebefe0975..e346eb7e598ed 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import cython from cython import Py_ssize_t from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, @@ -37,7 +38,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject, get_datetime64_nanos, tz_convert_utc_to_tzlocal) -from tslibs.nattype import NaT, nat_strings, iNaT +# many modules still look for NaT and iNaT here despite them not being needed +from tslibs.nattype import nat_strings, NaT, iNaT # noqa:F821 from tslibs.nattype cimport checknull_with_nat, NPY_NAT from tslibs.offsets cimport to_offset @@ -71,6 +73,8 @@ cdef inline object create_time_from_ts( return time(dts.hour, dts.min, dts.sec, dts.us, tz) +@cython.wraparound(False) +@cython.boundscheck(False) def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp @@ -213,6 +217,8 @@ def _test_parse_iso8601(object ts): return Timestamp(obj.value) +@cython.wraparound(False) +@cython.boundscheck(False) def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object format=None, object na_rep=None): """ @@ -335,7 +341,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): # then need to iterate try: iresult = values.astype('i8', casting='same_kind', copy=False) - mask = iresult == iNaT + mask = iresult == NPY_NAT iresult[mask] = 0 fvalues = iresult.astype('f8') * m need_to_iterate = False @@ -351,7 +357,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): "'{unit}'".format(unit=unit)) result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') - iresult[mask] = iNaT + iresult[mask] = NPY_NAT return result result = np.empty(n, dtype='M8[ns]') @@ -449,6 +455,8 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): return oresult +@cython.wraparound(False) +@cython.boundscheck(False) cpdef array_to_datetime(ndarray[object] values, errors='raise', dayfirst=False, yearfirst=False, format=None, utc=None, @@ -752,6 +760,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) +@cython.wraparound(False) +@cython.boundscheck(False) cdef array_to_datetime_object(ndarray[object] values, bint is_raise, dayfirst=False, yearfirst=False): """ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e2914957d01cd..457f5003cb9a5 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import enum import warnings from cpython cimport (PyObject_RichCompareBool, PyObject_RichCompare, @@ -23,7 +24,6 @@ cimport ccalendar from conversion import tz_localize_to_utc, normalize_i8_timestamps from conversion cimport (tz_convert_single, _TSObject, convert_to_tsobject, convert_datetime_to_tsobject) -import enum from fields import get_start_end_field, get_date_name_field from nattype import NaT from nattype cimport NPY_NAT diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index bb7af67d14585..f517e0933264a 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -9,15 +9,15 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp -from numpy cimport ndarray, double_t, int64_t, float64_t, float32_t +from numpy cimport ndarray, int64_t, float64_t, float32_t cnp.import_array() cdef extern from "src/headers/cmath" namespace "std": - bint isnan(double) nogil - bint notnan(double) nogil - int signbit(double) nogil - double sqrt(double x) nogil + bint isnan(float64_t) nogil + bint notnan(float64_t) nogil + int signbit(float64_t) nogil + float64_t sqrt(float64_t x) nogil cimport util from util cimport numeric @@ -32,7 +32,7 @@ cdef float64_t MINfloat64 = np.NINF cdef float32_t MAXfloat32 = np.inf cdef float64_t MAXfloat64 = np.inf -cdef double NaN = np.NaN +cdef float64_t NaN = np.NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b @@ -80,6 +80,7 @@ def _check_minp(win, minp, N, floor=None): return max(minp, floor) + # original C implementation by N. Devillard. # This code in public domain. # Function : kth_smallest() @@ -352,19 +353,20 @@ def get_window_indexer(values, win, minp, index, closed, right_closed, index, floor) return indexer.get_data() + # ---------------------------------------------------------------------- # Rolling count # this is only an impl for index not None, IOW, freq aware -def roll_count(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_count(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, count_x = 0.0 + float64_t val, count_x = 0.0 int64_t s, e, nobs, N Py_ssize_t i, j ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, _ = get_window_indexer(values, win, minp, index, closed) @@ -406,12 +408,15 @@ def roll_count(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling sum -cdef inline double calc_sum(int64_t minp, int64_t nobs, double sum_x) nogil: - cdef double result +cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, + float64_t sum_x) nogil: + cdef: + float64_t result if nobs >= minp: result = sum_x @@ -421,7 +426,7 @@ cdef inline double calc_sum(int64_t minp, int64_t nobs, double sum_x) nogil: return result -cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: +cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogil: """ add a value from the sum calc """ # Not NaN @@ -430,7 +435,8 @@ cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: sum_x[0] = sum_x[0] + val -cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: +cdef inline void remove_sum(float64_t val, + int64_t *nobs, float64_t *sum_x) nogil: """ remove a value from the sum calc """ if notnan(val): @@ -438,15 +444,15 @@ cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: sum_x[0] = sum_x[0] - val -def roll_sum(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_sum(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev_x, sum_x = 0 + float64_t val, prev_x, sum_x = 0 int64_t s, e, range_endpoint int64_t nobs = 0, i, j, N bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -511,16 +517,18 @@ def roll_sum(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling mean -cdef inline double calc_mean(int64_t minp, Py_ssize_t nobs, - Py_ssize_t neg_ct, double sum_x) nogil: - cdef double result +cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, + Py_ssize_t neg_ct, float64_t sum_x) nogil: + cdef: + float64_t result if nobs >= minp: - result = sum_x / nobs + result = sum_x / nobs if neg_ct == 0 and result < 0: # all positive result = 0 @@ -534,7 +542,7 @@ cdef inline double calc_mean(int64_t minp, Py_ssize_t nobs, return result -cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, +cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct) nogil: """ add a value from the mean calc """ @@ -546,7 +554,7 @@ cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, neg_ct[0] = neg_ct[0] + 1 -cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, +cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct) nogil: """ remove a value from the mean calc """ @@ -557,15 +565,15 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, neg_ct[0] = neg_ct[0] - 1 -def roll_mean(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_mean(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev_x, result, sum_x = 0 + float64_t val, prev_x, result, sum_x = 0 int64_t s, e bint is_variable Py_ssize_t nobs = 0, i, j, neg_ct = 0, N ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -627,13 +635,15 @@ def roll_mean(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling variance -cdef inline double calc_var(int64_t minp, int ddof, double nobs, - double ssqdm_x) nogil: - cdef double result +cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, + float64_t ssqdm_x) nogil: + cdef: + float64_t result # Variance is unchanged if no observation is added or removed if (nobs >= minp) and (nobs > ddof): @@ -642,7 +652,7 @@ cdef inline double calc_var(int64_t minp, int ddof, double nobs, if nobs == 1: result = 0 else: - result = ssqdm_x / (nobs - ddof) + result = ssqdm_x / (nobs - ddof) if result < 0: result = 0 else: @@ -651,10 +661,12 @@ cdef inline double calc_var(int64_t minp, int ddof, double nobs, return result -cdef inline void add_var(double val, double *nobs, double *mean_x, - double *ssqdm_x) nogil: +cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: """ add a value from the var calc """ - cdef double delta + cdef: + float64_t delta + # `isnan` instead of equality as fix for GH-21813, msvc 2017 bug if isnan(val): return @@ -667,10 +679,11 @@ cdef inline void add_var(double val, double *nobs, double *mean_x, ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0] -cdef inline void remove_var(double val, double *nobs, double *mean_x, - double *ssqdm_x) nogil: +cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: """ remove a value from the var calc """ - cdef double delta + cdef: + float64_t delta if notnan(val): nobs[0] = nobs[0] - 1 @@ -685,18 +698,19 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, ssqdm_x[0] = 0 -def roll_var(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_var(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed, int ddof=1): """ Numerically stable implementation using Welford's method. """ cdef: - double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta, mean_x_old + float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, + float64_t val, prev, delta, mean_x_old int64_t s, e bint is_variable Py_ssize_t i, j, N ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -785,13 +799,15 @@ def roll_var(ndarray[double_t] values, int64_t win, int64_t minp, # ---------------------------------------------------------------------- # Rolling skewness -cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, - double xxx) nogil: - cdef double result, dnobs - cdef double A, B, C, R +cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, R if nobs >= minp: - dnobs = nobs + dnobs = nobs A = x / dnobs B = xx / dnobs - A * A C = xxx / dnobs - A * A * A - 3 * A * B @@ -817,8 +833,9 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, return result -cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, - double *xxx) nogil: +cdef inline void add_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: """ add a value from the skew calc """ # Not NaN @@ -831,8 +848,9 @@ cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, xxx[0] = xxx[0] + val * val * val -cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, - double *xxx) nogil: +cdef inline void remove_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: """ remove a value from the skew calc """ # Not NaN @@ -845,16 +863,16 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, xxx[0] = xxx[0] - val * val * val -def roll_skew(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_skew(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev - double x = 0, xx = 0, xxx = 0 + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N int64_t s, e bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -915,17 +933,20 @@ def roll_skew(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling kurtosis -cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, - double xxx, double xxxx) nogil: - cdef double result, dnobs - cdef double A, B, C, D, R, K +cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx, float64_t xxxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, D, R, K if nobs >= minp: - dnobs = nobs + dnobs = nobs A = x / dnobs R = A * A B = xx / dnobs - R @@ -954,8 +975,9 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, return result -cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, - double *xxx, double *xxxx) nogil: +cdef inline void add_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: """ add a value from the kurotic calc """ # Not NaN @@ -969,8 +991,9 @@ cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, xxxx[0] = xxxx[0] + val * val * val * val -cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, - double *xxx, double *xxxx) nogil: +cdef inline void remove_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: """ remove a value from the kurotic calc """ # Not NaN @@ -984,16 +1007,16 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, xxxx[0] = xxxx[0] - val * val * val * val -def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_kurt(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev - double x = 0, xx = 0, xxx = 0, xxxx = 0 + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 int64_t nobs = 0, i, j, N int64_t s, e bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -1050,6 +1073,7 @@ def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling median, min, max @@ -1057,7 +1081,7 @@ def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, res, prev + float64_t val, res, prev bint err = 0, is_variable int ret = 0 skiplist_t *sl @@ -1065,7 +1089,7 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, int64_t nobs = 0, N, s, e int midpoint ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs @@ -1130,6 +1154,7 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, raise MemoryError("skiplist_insert failed") return output + # ---------------------------------------------------------------------- # Moving maximum / minimum code taken from Bottleneck under the terms @@ -1167,7 +1192,8 @@ cdef inline void remove_mm(numeric aold, Py_ssize_t *nobs) nogil: cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, numeric value) nogil: - cdef numeric result + cdef: + numeric result if numeric in cython.floating: if nobs >= minp: @@ -1252,7 +1278,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values, Py_ssize_t nobs = 0 deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute - ndarray[double_t, ndim=1] output + ndarray[float64_t, ndim=1] output output = np.empty(N, dtype=float) Q = deque[int64_t]() @@ -1335,7 +1361,7 @@ cdef _roll_min_max_fixed(ndarray[numeric] values, numeric* minvalue numeric* end numeric* last - ndarray[double_t, ndim=1] output + ndarray[float64_t, ndim=1] output output = np.empty(N, dtype=float) # setup the rings of death! @@ -1427,19 +1453,19 @@ interpolation_types = { def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, int64_t minp, object index, object closed, - double quantile, str interpolation): + float64_t quantile, str interpolation): """ O(N log(window)) implementation using skip list """ cdef: - double val, prev, midpoint, idx_with_fraction + float64_t val, prev, midpoint, idx_with_fraction skiplist_t *skiplist int64_t nobs = 0, i, j, s, e, N Py_ssize_t idx bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output - double vlow, vhigh + ndarray[float64_t] output + float64_t vlow, vhigh InterpolationType interpolation_type int ret = 0 @@ -1529,7 +1555,7 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, elif interpolation_type == MIDPOINT: vlow = skiplist_get(skiplist, idx, &ret) vhigh = skiplist_get(skiplist, idx + 1, &ret) - output[i] = (vlow + vhigh) / 2 + output[i] = (vlow + vhigh) / 2 else: output[i] = NaN @@ -1543,7 +1569,7 @@ def roll_generic(object obj, int offset, object func, bint raw, object args, object kwargs): cdef: - ndarray[double_t] output, counts, bufarr + ndarray[float64_t] output, counts, bufarr ndarray[float64_t, cast=True] arr float64_t *buf float64_t *oldbuf @@ -1642,7 +1668,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] values, Assume len(weights) << len(values) """ cdef: - ndarray[double_t] output, tot_wgt, counts + ndarray[float64_t] output, tot_wgt, counts Py_ssize_t in_i, win_i, win_n, win_k, in_n, in_k float64_t val_in, val_win, c, w @@ -1703,7 +1729,8 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] values, # Exponentially weighted moving average -def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): +def ewma(float64_t[:] vals, float64_t com, + int adjust, int ignore_na, int minp): """ Compute exponentially-weighted moving average using center-of-mass. @@ -1722,8 +1749,8 @@ def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): cdef: Py_ssize_t N = len(vals) - ndarray[double_t] output = np.empty(N, dtype=float) - double alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur + ndarray[float64_t] output = np.empty(N, dtype=float) + float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur Py_ssize_t i, nobs if N == 0: @@ -1767,12 +1794,13 @@ def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): return output + # ---------------------------------------------------------------------- # Exponentially weighted moving covariance -def ewmcov(double_t[:] input_x, double_t[:] input_y, - double_t com, int adjust, int ignore_na, int minp, int bias): +def ewmcov(float64_t[:] input_x, float64_t[:] input_y, + float64_t com, int adjust, int ignore_na, int minp, int bias): """ Compute exponentially-weighted moving variance using center-of-mass. @@ -1793,10 +1821,10 @@ def ewmcov(double_t[:] input_x, double_t[:] input_y, cdef: Py_ssize_t N = len(input_x) - double alpha, old_wt_factor, new_wt, mean_x, mean_y, cov - double sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y + float64_t alpha, old_wt_factor, new_wt, mean_x, mean_y, cov + float64_t sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y Py_ssize_t i, nobs - ndarray[double_t] output + ndarray[float64_t] output if len(input_y) != N: raise ValueError("arrays are of different lengths " diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 04d7f4d498c2b..c15696705ab82 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -3,7 +3,6 @@ import warnings import numpy as np -from numpy import nan import pytest from pandas._libs.sparse import IntIndex @@ -24,7 +23,8 @@ def kind(request): class TestSparseArray(object): def setup_method(self, method): - self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) + self.arr_data = np.array([np.nan, np.nan, 1, 2, 3, + np.nan, 4, 5, np.nan, 6]) self.arr = SparseArray(self.arr_data) self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 89d45639f3e03..bbe4914b5f447 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -6,7 +6,6 @@ import pytest -from numpy import nan import numpy as np from pandas.compat import range @@ -328,7 +327,7 @@ def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) del frame_copy['D'] - frame_copy['C'][:5] = nan + frame_copy['C'][:5] = np.nan added = self.frame + frame_copy diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 668613c494a47..01dee47fffe49 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -7,7 +7,6 @@ import sys import textwrap -from numpy import nan import numpy as np import pytest @@ -49,8 +48,8 @@ def test_repr_mixed_big(self): biggie = DataFrame({'A': np.random.randn(200), 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan foo = repr(biggie) # noqa diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 5794630e72419..4f04169d08206 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -6,7 +6,6 @@ import pytest -from numpy import nan from numpy.random import randn import numpy as np @@ -516,8 +515,8 @@ def test_first_last_valid(self, data, idx, expected_first, expected_last): N = len(self.frame.index) mat = randn(N) - mat[:5] = nan - mat[-5:] = nan + mat[:5] = np.nan + mat[-5:] = np.nan frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() @@ -533,7 +532,7 @@ def test_first_last_valid(self, data, idx, assert empty.first_valid_index() is None # GH17400: no valid entries - frame[:] = nan + frame[:] = np.nan assert frame.last_valid_index() is None assert frame.first_valid_index() is None diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index b56375d0a8670..cd43cfe34d80b 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -6,7 +6,6 @@ import csv import pytest -from numpy import nan import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) @@ -52,7 +51,7 @@ def test_from_csv_deprecation(self): def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: - self.frame['A'][:5] = nan + self.frame['A'][:5] = np.nan self.frame.to_csv(path) self.frame.to_csv(path, columns=['A', 'B']) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index d0e1f04238366..a0cc653a28b06 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -9,7 +9,6 @@ import pytest import numpy as np -from numpy import nan import pandas as pd from pandas import (bdate_range, DataFrame, Index, Series, Timestamp, @@ -36,11 +35,11 @@ 'max', ]) def test_cythonized_aggers(op_name): - data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], + data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., np.nan, np.nan], 'B': ['A', 'B'] * 6, 'C': np.random.randn(12)} df = DataFrame(data) - df.loc[2:10:2, 'C'] = nan + df.loc[2:10:2, 'C'] = np.nan op = lambda x: getattr(x, op_name)() diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 77e43a346c824..4cce26d135443 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -5,7 +5,6 @@ import operator import numpy as np -from numpy import nan import pytest import pandas.compat as compat @@ -750,12 +749,12 @@ def _check_fill(meth, op, a, b, fill_value=0): with np.errstate(all='ignore'): if amask[i]: if bmask[i]: - exp_values.append(nan) + exp_values.append(np.nan) continue exp_values.append(op(fill_value, b[i])) elif bmask[i]: if amask[i]: - exp_values.append(nan) + exp_values.append(np.nan) continue exp_values.append(op(a[i], fill_value)) else: @@ -765,8 +764,8 @@ def _check_fill(meth, op, a, b, fill_value=0): expected = Series(exp_values, exp_index) assert_series_equal(result, expected) - a = Series([nan, 1., 2., 3., nan], index=np.arange(5)) - b = Series([nan, 1, nan, 3, nan, 4.], index=np.arange(6)) + a = Series([np.nan, 1., 2., 3., np.nan], index=np.arange(5)) + b = Series([np.nan, 1, np.nan, 3, np.nan, 4.], index=np.arange(6)) result = op(a, b) exp = equiv_op(a, b) From 43a558f551555486df3ce495c54157e64113897c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 11 Nov 2018 09:22:06 -0600 Subject: [PATCH 09/11] API: DataFrame.__getitem__ returns Series for sparse column (#23561) closes https://github.com/pandas-dev/pandas/issues/23559 --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/dtypes/concat.py | 21 ------------------ pandas/core/frame.py | 3 +-- pandas/tests/frame/test_indexing.py | 27 ++++++++++++++++++----- pandas/tests/sparse/series/test_series.py | 5 ----- 5 files changed, 23 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index eb20d5368ef15..1d91836856888 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -562,6 +562,7 @@ changes were made: - The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``. - ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray. - Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed. +- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`). Some new warnings are issued for operations that require or are likely to materialize a large dense array: diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index b2999c112e8ab..bb4ab823069ee 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -101,27 +101,6 @@ def _get_frame_result_type(result, objs): ABCSparseDataFrame)) -def _get_sliced_frame_result_type(data, obj): - """ - return appropriate class of Series. When data is sparse - it will return a SparseSeries, otherwise it will return - the Series. - - Parameters - ---------- - data : array-like - obj : DataFrame - - Returns - ------- - Series or SparseSeries - """ - if is_sparse(data): - from pandas.core.sparse.api import SparseSeries - return SparseSeries - return obj._constructor_sliced - - def _concat_compat(to_concat, axis=0): """ provide concatenation of an array of arrays each of which is a single diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b29725ba2bea..7153f5c2e7007 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -72,7 +72,6 @@ is_iterator, is_sequence, is_named_tuple) -from pandas.core.dtypes.concat import _get_sliced_frame_result_type from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex from pandas.core.dtypes.missing import isna, notna @@ -3241,7 +3240,7 @@ def _box_item_values(self, key, values): def _box_col_values(self, values, items): """ provide boxed values for a column """ - klass = _get_sliced_frame_result_type(values, self) + klass = self._constructor_sliced return klass(values, index=self.index, name=items, fastpath=True) def __setitem__(self, key, value): diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index b0e7fe2e25a6c..78aa853f68459 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2277,19 +2277,34 @@ def test_getitem_ix_float_duplicates(self): expect = df.iloc[[1, -1], 0] assert_series_equal(df.loc[0.2, 'a'], expect) + def test_getitem_sparse_column(self): + # https://github.com/pandas-dev/pandas/issues/23559 + data = pd.SparseArray([0, 1]) + df = pd.DataFrame({"A": data}) + expected = pd.Series(data, name="A") + result = df['A'] + tm.assert_series_equal(result, expected) + + result = df.iloc[:, 0] + tm.assert_series_equal(result, expected) + + result = df.loc[:, 'A'] + tm.assert_series_equal(result, expected) + def test_setitem_with_sparse_value(self): # GH8131 df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]}) - sp_series = pd.Series([0, 0, 1]).to_sparse(fill_value=0) - df['new_column'] = sp_series - assert_series_equal(df['new_column'], sp_series, check_names=False) + sp_array = pd.SparseArray([0, 0, 1]) + df['new_column'] = sp_array + assert_series_equal(df['new_column'], + pd.Series(sp_array, name='new_column'), + check_names=False) def test_setitem_with_unaligned_sparse_value(self): df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]}) - sp_series = (pd.Series([0, 0, 1], index=[2, 1, 0]) - .to_sparse(fill_value=0)) + sp_series = pd.Series(pd.SparseArray([0, 0, 1]), index=[2, 1, 0]) df['new_column'] = sp_series - exp = pd.SparseSeries([1, 0, 0], name='new_column') + exp = pd.Series(pd.SparseArray([1, 0, 0]), name='new_column') assert_series_equal(df['new_column'], exp) def test_setitem_with_unaligned_tz_aware_datetime_column(self): diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 9c7dbd85edcbb..fd5dbcd932993 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -160,11 +160,6 @@ def test_construct_DataFrame_with_sp_series(self): df.dtypes str(df) - tm.assert_sp_series_equal(df['col'], self.bseries, check_names=False) - - result = df.iloc[:, 0] - tm.assert_sp_series_equal(result, self.bseries, check_names=False) - # blocking expected = Series({'col': 'float64:sparse'}) result = df.ftypes From da230304097a96d77d0cfd695a66bda6349be853 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 11 Nov 2018 07:40:42 -0800 Subject: [PATCH 10/11] BUG: Delegate more of Excel parsing to CSV (#23544) The idea is that we read the Excel file, get the data, and then let the TextParser handle the reading and parsing. We shouldn't be doing a lot of work that is already defined in parsers.py In doing so, we identified several bugs: * index_col=None was not being respected * usecols behavior was inconsistent with that of read_csv for list of strings and callable inputs * usecols was not being validated as proper Excel column names when passed as a string. Closes gh-18273. Closes gh-20480. --- doc/source/io.rst | 29 +- doc/source/whatsnew/v0.24.0.txt | 3 + pandas/io/excel.py | 194 ++++--- pandas/tests/io/test_excel.py | 955 +++++++++++++++++--------------- 4 files changed, 670 insertions(+), 511 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 5d29e349e2898..beb1c1daba962 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2861,7 +2861,13 @@ to be parsed. read_excel('path_to_file.xls', 'Sheet1', usecols=2) -If `usecols` is a list of integers, then it is assumed to be the file column +You can also specify a comma-delimited set of Excel columns and ranges as a string: + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', usecols='A,C:E') + +If ``usecols`` is a list of integers, then it is assumed to be the file column indices to be parsed. .. code-block:: python @@ -2870,6 +2876,27 @@ indices to be parsed. Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. +.. versionadded:: 0.24 + +If ``usecols`` is a list of strings, it is assumed that each string corresponds +to a column name provided either by the user in ``names`` or inferred from the +document header row(s). Those strings define which columns will be parsed: + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', usecols=['foo', 'bar']) + +Element order is ignored, so ``usecols=['baz', 'joe']`` is the same as ``['joe', 'baz']``. + +.. versionadded:: 0.24 + +If ``usecols`` is callable, the callable function will be evaluated against +the column names, returning names where the callable function evaluates to ``True``. + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', usecols=lambda x: x.isalpha()) + Parsing Dates +++++++++++++ diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1d91836856888..b4e959518245f 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -238,6 +238,7 @@ Other Enhancements - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`) +- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) .. _whatsnew_0240.api_breaking: @@ -1300,6 +1301,8 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) - Bug in :meth:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`) - Bug in :meth:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`) +- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`) +- Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`) Plotting ^^^^^^^^ diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 7a7b801f4ba4a..2e93c237bb7ea 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -17,8 +17,7 @@ import pandas._libs.json as json import pandas.compat as compat from pandas.compat import ( - OrderedDict, add_metaclass, lrange, map, range, reduce, string_types, u, - zip) + OrderedDict, add_metaclass, lrange, map, range, string_types, u, zip) from pandas.errors import EmptyDataError from pandas.util._decorators import Appender, deprecate_kwarg @@ -93,13 +92,22 @@ .. deprecated:: 0.21.0 Pass in `usecols` instead. -usecols : int or list, default None - * If None then parse all columns, - * If int then indicates last column to be parsed - * If list of ints then indicates list of column numbers to be parsed - * If string then indicates comma separated list of Excel column letters and - column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of +usecols : int, str, list-like, or callable default None + * If None, then parse all columns, + * If int, then indicates last column to be parsed + * If string, then indicates comma separated list of Excel column letters + and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of both sides. + * If list of ints, then indicates list of column numbers to be parsed. + * If list of strings, then indicates list of column names to be parsed. + + .. versionadded:: 0.24.0 + + * If callable, then evaluate each column name against it and parse the + column if the callable returns ``True``. + + .. versionadded:: 0.24.0 + squeeze : boolean, default False If the parsed data only contains one column then return a Series dtype : Type name or dict of column -> type, default None @@ -466,39 +474,6 @@ def parse(self, convert_float=convert_float, **kwds) - def _should_parse(self, i, usecols): - - def _range2cols(areas): - """ - Convert comma separated list of column names and column ranges to a - list of 0-based column indexes. - - >>> _range2cols('A:E') - [0, 1, 2, 3, 4] - >>> _range2cols('A,C,Z:AB') - [0, 2, 25, 26, 27] - """ - def _excel2num(x): - "Convert Excel column name like 'AB' to 0-based column index" - return reduce(lambda s, a: s * 26 + ord(a) - ord('A') + 1, - x.upper().strip(), 0) - 1 - - cols = [] - for rng in areas.split(','): - if ':' in rng: - rng = rng.split(':') - cols += lrange(_excel2num(rng[0]), _excel2num(rng[1]) + 1) - else: - cols.append(_excel2num(rng)) - return cols - - if isinstance(usecols, int): - return i <= usecols - elif isinstance(usecols, compat.string_types): - return i in _range2cols(usecols) - else: - return i in usecols - def _parse_excel(self, sheet_name=0, header=0, @@ -527,10 +502,6 @@ def _parse_excel(self, raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates is True and index_col is None: - warnings.warn("The 'parse_dates=True' keyword of read_excel was " - "provided without an 'index_col' keyword value.") - import xlrd from xlrd import (xldate, XL_CELL_DATE, XL_CELL_ERROR, XL_CELL_BOOLEAN, @@ -620,17 +591,13 @@ def _parse_cell(cell_contents, cell_typ): sheet = self.book.sheet_by_index(asheetname) data = [] - should_parse = {} + usecols = _maybe_convert_usecols(usecols) for i in range(sheet.nrows): row = [] for j, (value, typ) in enumerate(zip(sheet.row_values(i), sheet.row_types(i))): - if usecols is not None and j not in should_parse: - should_parse[j] = self._should_parse(j, usecols) - - if usecols is None or should_parse[j]: - row.append(_parse_cell(value, typ)) + row.append(_parse_cell(value, typ)) data.append(row) if sheet.nrows == 0: @@ -642,24 +609,22 @@ def _parse_cell(cell_contents, cell_typ): # forward fill and pull out names for MultiIndex column header_names = None - if header is not None: - if is_list_like(header): - header_names = [] - control_row = [True] * len(data[0]) - for row in header: - if is_integer(skiprows): - row += skiprows - - data[row], control_row = _fill_mi_header( - data[row], control_row) - header_name, data[row] = _pop_header_name( - data[row], index_col) - header_names.append(header_name) - else: - data[header] = _trim_excel_header(data[header]) + if header is not None and is_list_like(header): + header_names = [] + control_row = [True] * len(data[0]) + + for row in header: + if is_integer(skiprows): + row += skiprows + + data[row], control_row = _fill_mi_header( + data[row], control_row) + header_name, _ = _pop_header_name( + data[row], index_col) + header_names.append(header_name) if is_list_like(index_col): - # forward fill values for MultiIndex index + # Forward fill values for MultiIndex index. if not is_list_like(header): offset = 1 + header else: @@ -667,6 +632,7 @@ def _parse_cell(cell_contents, cell_typ): for col in index_col: last = data[offset][col] + for row in range(offset + 1, len(data)): if data[row][col] == '' or data[row][col] is None: data[row][col] = last @@ -693,11 +659,14 @@ def _parse_cell(cell_contents, cell_typ): thousands=thousands, comment=comment, skipfooter=skipfooter, + usecols=usecols, **kwds) output[asheetname] = parser.read(nrows=nrows) + if names is not None: output[asheetname].columns = names + if not squeeze or isinstance(output[asheetname], DataFrame): output[asheetname].columns = output[ asheetname].columns.set_names(header_names) @@ -726,6 +695,97 @@ def __exit__(self, exc_type, exc_value, traceback): self.close() +def _excel2num(x): + """ + Convert Excel column name like 'AB' to 0-based column index. + + Parameters + ---------- + x : str + The Excel column name to convert to a 0-based column index. + + Returns + ------- + num : int + The column index corresponding to the name. + + Raises + ------ + ValueError + Part of the Excel column name was invalid. + """ + index = 0 + + for c in x.upper().strip(): + cp = ord(c) + + if cp < ord("A") or cp > ord("Z"): + raise ValueError("Invalid column name: {x}".format(x=x)) + + index = index * 26 + cp - ord("A") + 1 + + return index - 1 + + +def _range2cols(areas): + """ + Convert comma separated list of column names and ranges to indices. + + Parameters + ---------- + areas : str + A string containing a sequence of column ranges (or areas). + + Returns + ------- + cols : list + A list of 0-based column indices. + + Examples + -------- + >>> _range2cols('A:E') + [0, 1, 2, 3, 4] + >>> _range2cols('A,C,Z:AB') + [0, 2, 25, 26, 27] + """ + cols = [] + + for rng in areas.split(","): + if ":" in rng: + rng = rng.split(":") + cols.extend(lrange(_excel2num(rng[0]), _excel2num(rng[1]) + 1)) + else: + cols.append(_excel2num(rng)) + + return cols + + +def _maybe_convert_usecols(usecols): + """ + Convert `usecols` into a compatible format for parsing in `parsers.py`. + + Parameters + ---------- + usecols : object + The use-columns object to potentially convert. + + Returns + ------- + converted : object + The compatible format of `usecols`. + """ + if usecols is None: + return usecols + + if is_integer(usecols): + return lrange(usecols + 1) + + if isinstance(usecols, compat.string_types): + return _range2cols(usecols) + + return usecols + + def _validate_freeze_panes(freeze_panes): if freeze_panes is not None: if ( diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 4bff39f8c7efc..49a3a3d58672d 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -179,6 +179,65 @@ def test_usecols_str(self, ext): tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) + @pytest.mark.parametrize("usecols", [ + [0, 1, 3], [0, 3, 1], + [1, 0, 3], [1, 3, 0], + [3, 0, 1], [3, 1, 0], + ]) + def test_usecols_diff_positional_int_columns_order(self, ext, usecols): + expected = self.get_csv_refdf("test1")[["A", "C"]] + result = self.get_exceldf("test1", ext, "Sheet1", + index_col=0, usecols=usecols) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.parametrize("usecols", [ + ["B", "D"], ["D", "B"] + ]) + def test_usecols_diff_positional_str_columns_order(self, ext, usecols): + expected = self.get_csv_refdf("test1")[["B", "D"]] + expected.index = range(len(expected)) + + result = self.get_exceldf("test1", ext, "Sheet1", usecols=usecols) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_read_excel_without_slicing(self, ext): + expected = self.get_csv_refdf("test1") + result = self.get_exceldf("test1", ext, "Sheet1", index_col=0) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str(self, ext): + expected = self.get_csv_refdf("test1")[["C", "D"]] + result = self.get_exceldf("test1", ext, "Sheet1", + index_col=0, usecols="A,D:E") + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str_invalid(self, ext): + msg = "Invalid column name: E1" + + with pytest.raises(ValueError, match=msg): + self.get_exceldf("test1", ext, "Sheet1", usecols="D:E1") + + def test_index_col_label_error(self, ext): + msg = "list indices must be integers.*, not str" + + with pytest.raises(TypeError, match=msg): + self.get_exceldf("test1", ext, "Sheet1", index_col=["A"], + usecols=["A", "C"]) + + def test_usecols_pass_non_existent_column(self, ext): + msg = ("Usecols do not match columns, " + "columns expected but not found: " + r"\['E'\]") + + with pytest.raises(ValueError, match=msg): + self.get_exceldf("test1", ext, usecols=["E"]) + + def test_usecols_wrong_type(self, ext): + msg = ("'usecols' must either be list-like of " + "all strings, all unicode, all integers or a callable.") + + with pytest.raises(ValueError, match=msg): + self.get_exceldf("test1", ext, usecols=["E1", 0]) + def test_excel_stop_iterator(self, ext): parsed = self.get_exceldf('test2', ext, 'Sheet1') @@ -446,63 +505,48 @@ def test_read_excel_blank_with_header(self, ext): actual = self.get_exceldf('blank_with_header', ext, 'Sheet1') tm.assert_frame_equal(actual, expected) - @td.skip_if_no('openpyxl') - @td.skip_if_no('xlwt') - # GH 12292 : error when read one empty column from excel file - def test_read_one_empty_col_no_header(self, ext): + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") + @pytest.mark.parametrize("header,expected", [ + (None, DataFrame([np.nan] * 4)), + (0, DataFrame({"Unnamed: 0": [np.nan] * 3})) + ]) + def test_read_one_empty_col_no_header(self, ext, header, expected): + # xref gh-12292 + filename = "no_header" df = pd.DataFrame( [["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]] ) + with ensure_clean(ext) as path: - df.to_excel(path, 'no_header', index=False, header=False) - actual_header_none = read_excel( - path, - 'no_header', - usecols=[0], - header=None - ) - - actual_header_zero = read_excel( - path, - 'no_header', - usecols=[0], - header=0 - ) - expected = DataFrame() - tm.assert_frame_equal(actual_header_none, expected) - tm.assert_frame_equal(actual_header_zero, expected) + df.to_excel(path, filename, index=False, header=False) + result = read_excel(path, filename, usecols=[0], header=header) - @td.skip_if_no('openpyxl') - @td.skip_if_no('xlwt') - def test_read_one_empty_col_with_header(self, ext): + tm.assert_frame_equal(result, expected) + + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") + @pytest.mark.parametrize("header,expected", [ + (None, DataFrame([0] + [np.nan] * 4)), + (0, DataFrame([np.nan] * 4)) + ]) + def test_read_one_empty_col_with_header(self, ext, header, expected): + filename = "with_header" df = pd.DataFrame( [["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]] ) + with ensure_clean(ext) as path: df.to_excel(path, 'with_header', index=False, header=True) - actual_header_none = read_excel( - path, - 'with_header', - usecols=[0], - header=None - ) - - actual_header_zero = read_excel( - path, - 'with_header', - usecols=[0], - header=0 - ) - expected_header_none = DataFrame(pd.Series([0], dtype='int64')) - tm.assert_frame_equal(actual_header_none, expected_header_none) - expected_header_zero = DataFrame(columns=[0]) - tm.assert_frame_equal(actual_header_zero, expected_header_zero) + result = read_excel(path, filename, usecols=[0], header=header) + + tm.assert_frame_equal(result, expected) @td.skip_if_no('openpyxl') @td.skip_if_no('xlwt') @@ -539,29 +583,33 @@ def test_date_conversion_overflow(self, ext): result = self.get_exceldf('testdateoverflow', ext) tm.assert_frame_equal(result, expected) - @td.skip_if_no('xlrd', '1.0.1') # GH-22682 + @td.skip_if_no("xlrd", "1.0.1") # see gh-22682 def test_sheet_name_and_sheetname(self, ext): - # GH10559: Minor improvement: Change "sheet_name" to "sheetname" - # GH10969: DOC: Consistent var names (sheetname vs sheet_name) - # GH12604: CLN GH10559 Rename sheetname variable to sheet_name - # GH20920: ExcelFile.parse() and pd.read_xlsx() have different - # behavior for "sheetname" argument - dfref = self.get_csv_refdf('test1') - df1 = self.get_exceldf('test1', ext, - sheet_name='Sheet1') # doc + # gh-10559: Minor improvement: Change "sheet_name" to "sheetname" + # gh-10969: DOC: Consistent var names (sheetname vs sheet_name) + # gh-12604: CLN GH10559 Rename sheetname variable to sheet_name + # gh-20920: ExcelFile.parse() and pd.read_xlsx() have different + # behavior for "sheetname" argument + filename = "test1" + sheet_name = "Sheet1" + + df_ref = self.get_csv_refdf(filename) + df1 = self.get_exceldf(filename, ext, + sheet_name=sheet_name, index_col=0) # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df2 = self.get_exceldf('test1', ext, - sheetname='Sheet1') # bkwrd compat + df2 = self.get_exceldf(filename, ext, index_col=0, + sheetname=sheet_name) # backward compat - excel = self.get_excelfile('test1', ext) - df1_parse = excel.parse(sheet_name='Sheet1') # doc + excel = self.get_excelfile(filename, ext) + df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df2_parse = excel.parse(sheetname='Sheet1') # bkwrd compat + df2_parse = excel.parse(index_col=0, + sheetname=sheet_name) # backward compat - tm.assert_frame_equal(df1, dfref, check_names=False) - tm.assert_frame_equal(df2, dfref, check_names=False) - tm.assert_frame_equal(df1_parse, dfref, check_names=False) - tm.assert_frame_equal(df2_parse, dfref, check_names=False) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + tm.assert_frame_equal(df1_parse, df_ref, check_names=False) + tm.assert_frame_equal(df2_parse, df_ref, check_names=False) def test_sheet_name_both_raises(self, ext): with pytest.raises(TypeError, match="Cannot specify both"): @@ -594,20 +642,24 @@ def test_excel_read_buffer(self, ext): actual = read_excel(xls, 'Sheet1', index_col=0) tm.assert_frame_equal(expected, actual) - @td.skip_if_no('xlwt') - def test_read_xlrd_Book(self, ext): + @td.skip_if_no("xlwt") + def test_read_xlrd_book(self, ext): import xlrd - df = self.frame - with ensure_clean('.xls') as pth: - df.to_excel(pth, "SheetA") + + engine = "xlrd" + sheet_name = "SheetA" + + with ensure_clean(ext) as pth: + df.to_excel(pth, sheet_name) book = xlrd.open_workbook(pth) - with ExcelFile(book, engine="xlrd") as xl: - result = read_excel(xl, "SheetA") + with ExcelFile(book, engine=engine) as xl: + result = read_excel(xl, sheet_name, index_col=0) tm.assert_frame_equal(df, result) - result = read_excel(book, sheet_name="SheetA", engine="xlrd") + result = read_excel(book, sheet_name=sheet_name, + engine=engine, index_col=0) tm.assert_frame_equal(df, result) @tm.network @@ -618,17 +670,18 @@ def test_read_from_http_url(self, ext): local_table = self.get_exceldf('test1', ext) tm.assert_frame_equal(url_table, local_table) - @td.skip_if_no('s3fs') + @td.skip_if_no("s3fs") @td.skip_if_not_us_locale def test_read_from_s3_url(self, ext): - boto3 = pytest.importorskip('boto3') - moto = pytest.importorskip('moto') + moto = pytest.importorskip("moto") + boto3 = pytest.importorskip("boto3") with moto.mock_s3(): conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket="pandas-test") file_name = os.path.join(self.dirpath, 'test1' + ext) - with open(file_name, 'rb') as f: + + with open(file_name, "rb") as f: conn.Bucket("pandas-test").put_object(Key="test1" + ext, Body=f) @@ -695,17 +748,18 @@ def test_reader_closes_file(self, ext): assert f.closed - @td.skip_if_no('openpyxl') - @td.skip_if_no('xlwt') + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") def test_creating_and_reading_multiple_sheets(self, ext): - # Test reading multiple sheets, from a runtime created excel file - # with multiple sheets. - # See PR #9450 - def tdf(sheetname): + # see gh-9450 + # + # Test reading multiple sheets, from a runtime + # created Excel file with multiple sheets. + def tdf(col_sheet_name): d, i = [11, 22, 33], [1, 2, 3] - return DataFrame(d, i, columns=[sheetname]) + return DataFrame(d, i, columns=[col_sheet_name]) - sheets = ['AAA', 'BBB', 'CCC'] + sheets = ["AAA", "BBB", "CCC"] dfs = [tdf(s) for s in sheets] dfs = dict(zip(sheets, dfs)) @@ -714,7 +768,9 @@ def tdf(sheetname): with ExcelWriter(pth) as ew: for sheetname, df in iteritems(dfs): df.to_excel(ew, sheetname) - dfs_returned = read_excel(pth, sheet_name=sheets) + + dfs_returned = read_excel(pth, sheet_name=sheets, index_col=0) + for s in sheets: tm.assert_frame_equal(dfs[s], dfs_returned[s]) @@ -756,206 +812,206 @@ def test_reader_seconds(self, ext): tm.assert_frame_equal(actual, expected) def test_read_excel_multiindex(self, ext): - # GH 4679 - mi = MultiIndex.from_product([['foo', 'bar'], ['a', 'b']]) - mi_file = os.path.join(self.dirpath, 'testmultiindex' + ext) - - expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True], - [2, 3.5, pd.Timestamp('2015-01-02'), False], - [3, 4.5, pd.Timestamp('2015-01-03'), False], - [4, 5.5, pd.Timestamp('2015-01-04'), True]], + # see gh-4679 + mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]]) + mi_file = os.path.join(self.dirpath, "testmultiindex" + ext) + + # "mi_column" sheet + expected = DataFrame([[1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True]], columns=mi) - actual = read_excel(mi_file, 'mi_column', header=[0, 1]) - tm.assert_frame_equal(actual, expected) - actual = read_excel(mi_file, 'mi_column', header=[0, 1], index_col=0) + actual = read_excel(mi_file, "mi_column", header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) - expected.columns = ['a', 'b', 'c', 'd'] + # "mi_index" sheet expected.index = mi - actual = read_excel(mi_file, 'mi_index', index_col=[0, 1]) + expected.columns = ["a", "b", "c", "d"] + + actual = read_excel(mi_file, "mi_index", index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) + # "both" sheet expected.columns = mi - actual = read_excel(mi_file, 'both', index_col=[0, 1], header=[0, 1]) + + actual = read_excel(mi_file, "both", index_col=[0, 1], header=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) - expected.index = mi.set_names(['ilvl1', 'ilvl2']) - expected.columns = ['a', 'b', 'c', 'd'] - actual = read_excel(mi_file, 'mi_index_name', index_col=[0, 1]) + # "mi_index_name" sheet + expected.columns = ["a", "b", "c", "d"] + expected.index = mi.set_names(["ilvl1", "ilvl2"]) + + actual = read_excel(mi_file, "mi_index_name", index_col=[0, 1]) tm.assert_frame_equal(actual, expected) + # "mi_column_name" sheet expected.index = list(range(4)) - expected.columns = mi.set_names(['c1', 'c2']) - actual = read_excel(mi_file, 'mi_column_name', + expected.columns = mi.set_names(["c1", "c2"]) + actual = read_excel(mi_file, "mi_column_name", header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) - # Issue #11317 + # see gh-11317 + # "name_with_int" sheet expected.columns = mi.set_levels( - [1, 2], level=1).set_names(['c1', 'c2']) - actual = read_excel(mi_file, 'name_with_int', + [1, 2], level=1).set_names(["c1", "c2"]) + + actual = read_excel(mi_file, "name_with_int", index_col=0, header=[0, 1]) tm.assert_frame_equal(actual, expected) - expected.columns = mi.set_names(['c1', 'c2']) - expected.index = mi.set_names(['ilvl1', 'ilvl2']) - actual = read_excel(mi_file, 'both_name', - index_col=[0, 1], header=[0, 1]) - tm.assert_frame_equal(actual, expected) + # "both_name" sheet + expected.columns = mi.set_names(["c1", "c2"]) + expected.index = mi.set_names(["ilvl1", "ilvl2"]) - actual = read_excel(mi_file, 'both_name', + actual = read_excel(mi_file, "both_name", index_col=[0, 1], header=[0, 1]) tm.assert_frame_equal(actual, expected) - actual = read_excel(mi_file, 'both_name_skiprows', index_col=[0, 1], + # "both_skiprows" sheet + actual = read_excel(mi_file, "both_name_skiprows", index_col=[0, 1], header=[0, 1], skiprows=2) tm.assert_frame_equal(actual, expected) - @td.skip_if_no('xlsxwriter') + @td.skip_if_no("xlsxwriter") def test_read_excel_multiindex_empty_level(self, ext): - # GH 12453 - with ensure_clean('.xlsx') as path: + # see gh-12453 + with ensure_clean(ext) as path: df = DataFrame({ - ('One', 'x'): {0: 1}, - ('Two', 'X'): {0: 3}, - ('Two', 'Y'): {0: 7}, - ('Zero', ''): {0: 0} + ("One", "x"): {0: 1}, + ("Two", "X"): {0: 3}, + ("Two", "Y"): {0: 7}, + ("Zero", ""): {0: 0} }) expected = DataFrame({ - ('One', u'x'): {0: 1}, - ('Two', u'X'): {0: 3}, - ('Two', u'Y'): {0: 7}, - ('Zero', 'Unnamed: 3_level_1'): {0: 0} + ("One", u"x"): {0: 1}, + ("Two", u"X"): {0: 3}, + ("Two", u"Y"): {0: 7}, + ("Zero", "Unnamed: 4_level_1"): {0: 0} }) df.to_excel(path) - actual = pd.read_excel(path, header=[0, 1]) + actual = pd.read_excel(path, header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) df = pd.DataFrame({ - ('Beg', ''): {0: 0}, - ('Middle', 'x'): {0: 1}, - ('Tail', 'X'): {0: 3}, - ('Tail', 'Y'): {0: 7} + ("Beg", ""): {0: 0}, + ("Middle", "x"): {0: 1}, + ("Tail", "X"): {0: 3}, + ("Tail", "Y"): {0: 7} }) expected = pd.DataFrame({ - ('Beg', 'Unnamed: 0_level_1'): {0: 0}, - ('Middle', u'x'): {0: 1}, - ('Tail', u'X'): {0: 3}, - ('Tail', u'Y'): {0: 7} + ("Beg", "Unnamed: 1_level_1"): {0: 0}, + ("Middle", u"x"): {0: 1}, + ("Tail", u"X"): {0: 3}, + ("Tail", u"Y"): {0: 7} }) df.to_excel(path) - actual = pd.read_excel(path, header=[0, 1]) + actual = pd.read_excel(path, header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) - @td.skip_if_no('xlsxwriter') - def test_excel_multindex_roundtrip(self, ext): - # GH 4679 - with ensure_clean('.xlsx') as pth: - for c_idx_names in [True, False]: - for r_idx_names in [True, False]: - for c_idx_levels in [1, 3]: - for r_idx_levels in [1, 3]: - # column index name can't be serialized unless - # MultiIndex - if (c_idx_levels == 1 and c_idx_names): - continue - - # empty name case current read in as unnamed - # levels, not Nones - check_names = True - if not r_idx_names and r_idx_levels > 1: - check_names = False - - df = mkdf(5, 5, c_idx_names, - r_idx_names, c_idx_levels, - r_idx_levels) - df.to_excel(pth) - act = pd.read_excel( - pth, index_col=list(range(r_idx_levels)), + @td.skip_if_no("xlsxwriter") + @pytest.mark.parametrize("c_idx_names", [True, False]) + @pytest.mark.parametrize("r_idx_names", [True, False]) + @pytest.mark.parametrize("c_idx_levels", [1, 3]) + @pytest.mark.parametrize("r_idx_levels", [1, 3]) + def test_excel_multindex_roundtrip(self, ext, c_idx_names, r_idx_names, + c_idx_levels, r_idx_levels): + # see gh-4679 + with ensure_clean(ext) as pth: + if c_idx_levels == 1 and c_idx_names: + pytest.skip("Column index name cannot be " + "serialized unless it's a MultiIndex") + + # Empty name case current read in as + # unnamed levels, not Nones. + check_names = r_idx_names or r_idx_levels <= 1 + + df = mkdf(5, 5, c_idx_names, r_idx_names, + c_idx_levels, r_idx_levels) + df.to_excel(pth) + + act = pd.read_excel(pth, index_col=list(range(r_idx_levels)), header=list(range(c_idx_levels))) - tm.assert_frame_equal( - df, act, check_names=check_names) + tm.assert_frame_equal(df, act, check_names=check_names) - df.iloc[0, :] = np.nan - df.to_excel(pth) - act = pd.read_excel( - pth, index_col=list(range(r_idx_levels)), + df.iloc[0, :] = np.nan + df.to_excel(pth) + + act = pd.read_excel(pth, index_col=list(range(r_idx_levels)), header=list(range(c_idx_levels))) - tm.assert_frame_equal( - df, act, check_names=check_names) + tm.assert_frame_equal(df, act, check_names=check_names) - df.iloc[-1, :] = np.nan - df.to_excel(pth) - act = pd.read_excel( - pth, index_col=list(range(r_idx_levels)), + df.iloc[-1, :] = np.nan + df.to_excel(pth) + act = pd.read_excel(pth, index_col=list(range(r_idx_levels)), header=list(range(c_idx_levels))) - tm.assert_frame_equal( - df, act, check_names=check_names) + tm.assert_frame_equal(df, act, check_names=check_names) def test_excel_old_index_format(self, ext): # see gh-4679 - filename = 'test_index_name_pre17' + ext + filename = "test_index_name_pre17" + ext in_file = os.path.join(self.dirpath, filename) # We detect headers to determine if index names exist, so # that "index" name in the "names" version of the data will # now be interpreted as rows that include null data. data = np.array([[None, None, None, None, None], - ['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], - ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], - ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], - ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'], - ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']]) - columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4'] - mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1', - 'R_l0_g2', 'R_l0_g3', 'R_l0_g4'], - ['R1', 'R_l1_g0', 'R_l1_g1', - 'R_l1_g2', 'R_l1_g3', 'R_l1_g4']], + ["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"]]) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex(levels=[["R0", "R_l0_g0", "R_l0_g1", + "R_l0_g2", "R_l0_g3", "R_l0_g4"], + ["R1", "R_l1_g0", "R_l1_g1", + "R_l1_g2", "R_l1_g3", "R_l1_g4"]], labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], names=[None, None]) - si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], name=None) + si = Index(["R0", "R_l0_g0", "R_l0_g1", "R_l0_g2", + "R_l0_g3", "R_l0_g4"], name=None) expected = pd.DataFrame(data, index=si, columns=columns) - actual = pd.read_excel(in_file, 'single_names') + actual = pd.read_excel(in_file, "single_names", index_col=0) tm.assert_frame_equal(actual, expected) expected.index = mi - actual = pd.read_excel(in_file, 'multi_names') + actual = pd.read_excel(in_file, "multi_names", index_col=[0, 1]) tm.assert_frame_equal(actual, expected) # The analogous versions of the "names" version data # where there are explicitly no names for the indices. - data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], - ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], - ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], - ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'], - ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']]) - columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4'] - mi = MultiIndex(levels=[['R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], - ['R_l1_g0', 'R_l1_g1', 'R_l1_g2', - 'R_l1_g3', 'R_l1_g4']], + data = np.array([["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"]]) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex(levels=[["R_l0_g0", "R_l0_g1", "R_l0_g2", + "R_l0_g3", "R_l0_g4"], + ["R_l1_g0", "R_l1_g1", "R_l1_g2", + "R_l1_g3", "R_l1_g4"]], labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], names=[None, None]) - si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], name=None) + si = Index(["R_l0_g0", "R_l0_g1", "R_l0_g2", + "R_l0_g3", "R_l0_g4"], name=None) expected = pd.DataFrame(data, index=si, columns=columns) - actual = pd.read_excel(in_file, 'single_no_names') + actual = pd.read_excel(in_file, "single_no_names", index_col=0) tm.assert_frame_equal(actual, expected) expected.index = mi - actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1]) + actual = pd.read_excel(in_file, "multi_no_names", index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) def test_read_excel_bool_header_arg(self, ext): @@ -971,33 +1027,28 @@ def test_read_excel_chunksize(self, ext): pd.read_excel(os.path.join(self.dirpath, 'test1' + ext), chunksize=100) - @td.skip_if_no('openpyxl') - @td.skip_if_no('xlwt') + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") def test_read_excel_parse_dates(self, ext): - # GH 11544, 12051 + # see gh-11544, gh-12051 df = DataFrame( - {'col': [1, 2, 3], - 'date_strings': pd.date_range('2012-01-01', periods=3)}) + {"col": [1, 2, 3], + "date_strings": pd.date_range("2012-01-01", periods=3)}) df2 = df.copy() - df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') + df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y") with ensure_clean(ext) as pth: df2.to_excel(pth) - res = read_excel(pth) + res = read_excel(pth, index_col=0) tm.assert_frame_equal(df2, res) - # no index_col specified when parse_dates is True - with tm.assert_produces_warning(): - res = read_excel(pth, parse_dates=True) - tm.assert_frame_equal(df2, res) - - res = read_excel(pth, parse_dates=['date_strings'], index_col=0) + res = read_excel(pth, parse_dates=["date_strings"], index_col=0) tm.assert_frame_equal(df, res) - dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y') - res = read_excel(pth, parse_dates=['date_strings'], - date_parser=dateparser, index_col=0) + date_parser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y") + res = read_excel(pth, parse_dates=["date_strings"], + date_parser=date_parser, index_col=0) tm.assert_frame_equal(df, res) def test_read_excel_skiprows_list(self, ext): @@ -1106,26 +1157,29 @@ class and any subclasses, on account of the `autouse=True` class TestExcelWriter(_WriterBase): # Base class for test cases to run with different Excel writers. - def test_excel_sheet_by_name_raise(self, merge_cells, engine, ext): + def test_excel_sheet_by_name_raise(self, *_): import xlrd gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(self.path) + xl = ExcelFile(self.path) - df = read_excel(xl, 0) + df = read_excel(xl, 0, index_col=0) + tm.assert_frame_equal(gt, df) with pytest.raises(xlrd.XLRDError): - read_excel(xl, '0') + read_excel(xl, "0") - def test_excelwriter_contextmanager(self, merge_cells, engine, ext): + def test_excel_writer_context_manager(self, *_): with ExcelWriter(self.path) as writer: - self.frame.to_excel(writer, 'Data1') - self.frame2.to_excel(writer, 'Data2') + self.frame.to_excel(writer, "Data1") + self.frame2.to_excel(writer, "Data2") with ExcelFile(self.path) as reader: - found_df = read_excel(reader, 'Data1') - found_df2 = read_excel(reader, 'Data2') + found_df = read_excel(reader, "Data1", index_col=0) + found_df2 = read_excel(reader, "Data2", index_col=0) + tm.assert_frame_equal(found_df, self.frame) tm.assert_frame_equal(found_df2, self.frame2) @@ -1182,12 +1236,13 @@ def test_mixed(self, merge_cells, engine, ext): recons = read_excel(reader, 'test1', index_col=0) tm.assert_frame_equal(self.mixed_frame, recons) - def test_tsframe(self, merge_cells, engine, ext): + def test_ts_frame(self, *_): df = tm.makeTimeDataFrame()[:5] - df.to_excel(self.path, 'test1') + df.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + + recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(df, recons) def test_basics_with_nan(self, merge_cells, engine, ext): @@ -1200,21 +1255,25 @@ def test_basics_with_nan(self, merge_cells, engine, ext): @pytest.mark.parametrize("np_type", [ np.int8, np.int16, np.int32, np.int64]) def test_int_types(self, merge_cells, engine, ext, np_type): - # Test np.int values read come back as int (rather than float - # which is Excel's format). + # Test np.int values read come back as int + # (rather than float which is Excel's format). frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) - frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, "test1") + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + recons = read_excel(reader, "test1", index_col=0) + int_frame = frame.astype(np.int64) tm.assert_frame_equal(int_frame, recons) - recons2 = read_excel(self.path, 'test1') + + recons2 = read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(int_frame, recons2) - # test with convert_float=False comes back as float + # Test with convert_float=False comes back as float. float_frame = frame.astype(float) - recons = read_excel(self.path, 'test1', convert_float=False) + recons = read_excel(self.path, "test1", + convert_float=False, index_col=0) tm.assert_frame_equal(recons, float_frame, check_index_type=False, check_column_type=False) @@ -1224,25 +1283,31 @@ def test_int_types(self, merge_cells, engine, ext, np_type): def test_float_types(self, merge_cells, engine, ext, np_type): # Test np.float values read come back as float. frame = DataFrame(np.random.random_sample(10), dtype=np_type) - frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, "test1") + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1').astype(np_type) + recons = read_excel(reader, "test1", index_col=0).astype(np_type) + tm.assert_frame_equal(frame, recons, check_dtype=False) @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) def test_bool_types(self, merge_cells, engine, ext, np_type): # Test np.bool values read come back as float. frame = (DataFrame([1, 0, True, False], dtype=np_type)) - frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, "test1") + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1').astype(np_type) + recons = read_excel(reader, "test1", index_col=0).astype(np_type) + tm.assert_frame_equal(frame, recons) - def test_inf_roundtrip(self, merge_cells, engine, ext): + def test_inf_roundtrip(self, *_): frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) - frame.to_excel(self.path, 'test1') + frame.to_excel(self.path, "test1") + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + recons = read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(frame, recons) def test_sheets(self, merge_cells, engine, ext): @@ -1353,37 +1418,41 @@ def test_excel_roundtrip_indexname(self, merge_cells, engine, ext): tm.assert_frame_equal(result, df) assert result.index.name == 'foo' - def test_excel_roundtrip_datetime(self, merge_cells, engine, ext): + def test_excel_roundtrip_datetime(self, merge_cells, *_): # datetime.date, not sure what to test here exactly tsf = self.tsframe.copy() tsf.index = [x.date() for x in self.tsframe.index] - tsf.to_excel(self.path, 'test1', merge_cells=merge_cells) + tsf.to_excel(self.path, "test1", merge_cells=merge_cells) + reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + recons = read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(self.tsframe, recons) - # GH4133 - excel output format strings def test_excel_date_datetime_format(self, merge_cells, engine, ext): + # see gh-4133 + # + # Excel output format strings df = DataFrame([[date(2014, 1, 31), date(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], - index=['DATE', 'DATETIME'], columns=['X', 'Y']) + index=["DATE", "DATETIME"], columns=["X", "Y"]) df_expected = DataFrame([[datetime(2014, 1, 31), datetime(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], - index=['DATE', 'DATETIME'], columns=['X', 'Y']) + index=["DATE", "DATETIME"], columns=["X", "Y"]) with ensure_clean(ext) as filename2: writer1 = ExcelWriter(self.path) writer2 = ExcelWriter(filename2, - date_format='DD.MM.YYYY', - datetime_format='DD.MM.YYYY HH-MM-SS') + date_format="DD.MM.YYYY", + datetime_format="DD.MM.YYYY HH-MM-SS") - df.to_excel(writer1, 'test1') - df.to_excel(writer2, 'test1') + df.to_excel(writer1, "test1") + df.to_excel(writer2, "test1") writer1.close() writer2.close() @@ -1391,54 +1460,66 @@ def test_excel_date_datetime_format(self, merge_cells, engine, ext): reader1 = ExcelFile(self.path) reader2 = ExcelFile(filename2) - rs1 = read_excel(reader1, 'test1', index_col=None) - rs2 = read_excel(reader2, 'test1', index_col=None) + rs1 = read_excel(reader1, "test1", index_col=0) + rs2 = read_excel(reader2, "test1", index_col=0) tm.assert_frame_equal(rs1, rs2) - # since the reader returns a datetime object for dates, we need - # to use df_expected to check the result + # Since the reader returns a datetime object for dates, + # we need to use df_expected to check the result. tm.assert_frame_equal(rs2, df_expected) - def test_to_excel_interval_no_labels(self, merge_cells, engine, ext): - # GH19242 - test writing Interval without labels + def test_to_excel_interval_no_labels(self, *_): + # see gh-19242 + # + # Test writing Interval without labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() - frame['new'] = pd.cut(frame[0], 10) - expected['new'] = pd.cut(expected[0], 10).astype(str) - frame.to_excel(self.path, 'test1') + + frame["new"] = pd.cut(frame[0], 10) + expected["new"] = pd.cut(expected[0], 10).astype(str) + + frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + + recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) - def test_to_excel_interval_labels(self, merge_cells, engine, ext): - # GH19242 - test writing Interval with labels + def test_to_excel_interval_labels(self, *_): + # see gh-19242 + # + # Test writing Interval with labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() - intervals = pd.cut(frame[0], 10, labels=['A', 'B', 'C', 'D', 'E', - 'F', 'G', 'H', 'I', 'J']) - frame['new'] = intervals - expected['new'] = pd.Series(list(intervals)) - frame.to_excel(self.path, 'test1') + intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E", + "F", "G", "H", "I", "J"]) + frame["new"] = intervals + expected["new"] = pd.Series(list(intervals)) + + frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + + recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) - def test_to_excel_timedelta(self, merge_cells, engine, ext): - # GH 19242, GH9155 - test writing timedelta to xls + def test_to_excel_timedelta(self, *_): + # see gh-19242, gh-9155 + # + # Test writing timedelta to xls. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), - columns=['A'], - dtype=np.int64 - ) + columns=["A"], dtype=np.int64) expected = frame.copy() - frame['new'] = frame['A'].apply(lambda x: timedelta(seconds=x)) - expected['new'] = expected['A'].apply( + + frame["new"] = frame["A"].apply(lambda x: timedelta(seconds=x)) + expected["new"] = expected["A"].apply( lambda x: timedelta(seconds=x).total_seconds() / float(86400)) - frame.to_excel(self.path, 'test1') + + frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) - recons = read_excel(reader, 'test1') + + recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons) def test_to_excel_periodindex(self, merge_cells, engine, ext): @@ -1543,53 +1624,54 @@ def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, # Test that it is the same as the initial frame. tm.assert_frame_equal(frame1, frame3) - def test_to_excel_float_format(self, merge_cells, engine, ext): + def test_to_excel_float_format(self, *_): df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - df.to_excel(self.path, 'test1', float_format='%.2f') + index=["A", "B"], columns=["X", "Y", "Z"]) + df.to_excel(self.path, "test1", float_format="%.2f") reader = ExcelFile(self.path) - rs = read_excel(reader, 'test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) + result = read_excel(reader, "test1", index_col=0) + + expected = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=["A", "B"], columns=["X", "Y", "Z"]) + tm.assert_frame_equal(result, expected) def test_to_excel_output_encoding(self, merge_cells, engine, ext): - # avoid mixed inferred_type - df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'], - [u'\u0195', u'\u0196', u'\u0197']], - index=[u'A\u0192', u'B'], - columns=[u'X\u0193', u'Y', u'Z']) - - with ensure_clean('__tmp_to_excel_float_format__.' + ext) as filename: - df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') - result = read_excel(filename, 'TestSheet', encoding='utf8') + # Avoid mixed inferred_type. + df = DataFrame([[u"\u0192", u"\u0193", u"\u0194"], + [u"\u0195", u"\u0196", u"\u0197"]], + index=[u"A\u0192", u"B"], + columns=[u"X\u0193", u"Y", u"Z"]) + + with ensure_clean("__tmp_to_excel_float_format__." + ext) as filename: + df.to_excel(filename, sheet_name="TestSheet", encoding="utf8") + result = read_excel(filename, "TestSheet", + encoding="utf8", index_col=0) tm.assert_frame_equal(result, df) def test_to_excel_unicode_filename(self, merge_cells, engine, ext): - with ensure_clean(u('\u0192u.') + ext) as filename: + with ensure_clean(u("\u0192u.") + ext) as filename: try: - f = open(filename, 'wb') + f = open(filename, "wb") except UnicodeEncodeError: - pytest.skip('no unicode file names on this system') + pytest.skip("No unicode file names on this system") else: f.close() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - df.to_excel(filename, 'test1', float_format='%.2f') + index=["A", "B"], columns=["X", "Y", "Z"]) + df.to_excel(filename, "test1", float_format="%.2f") reader = ExcelFile(filename) - rs = read_excel(reader, 'test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) + result = read_excel(reader, "test1", index_col=0) + + expected = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=["A", "B"], columns=["X", "Y", "Z"]) + tm.assert_frame_equal(result, expected) # def test_to_excel_header_styling_xls(self, merge_cells, engine, ext): @@ -1691,106 +1773,83 @@ def test_to_excel_unicode_filename(self, merge_cells, engine, ext): # assert ws.cell(maddr).merged # os.remove(filename) - def test_excel_010_hemstring(self, merge_cells, engine, ext): - if merge_cells: - pytest.skip('Skip tests for merged MI format.') + @pytest.mark.parametrize("use_headers", [True, False]) + @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3]) + @pytest.mark.parametrize("c_idx_nlevels", [1, 2, 3]) + def test_excel_010_hemstring(self, merge_cells, engine, ext, + c_idx_nlevels, r_idx_nlevels, use_headers): - from pandas.util.testing import makeCustomDataframe as mkdf - # ensure limited functionality in 0.10 - # override of #2370 until sorted out in 0.11 + def roundtrip(data, header=True, parser_hdr=0, index=True): + data.to_excel(self.path, header=header, + merge_cells=merge_cells, index=index) - def roundtrip(df, header=True, parser_hdr=0, index=True): - - df.to_excel(self.path, header=header, - merge_cells=merge_cells, index=index) xf = ExcelFile(self.path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res - - nrows = 5 - ncols = 3 - for use_headers in (True, False): - for i in range(1, 4): # row multindex up to nlevel=3 - for j in range(1, 4): # col "" - df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) - - # this if will be removed once multi column excel writing - # is implemented for now fixing #9794 - if j > 1: - with pytest.raises(NotImplementedError): - res = roundtrip(df, use_headers, index=False) - else: - res = roundtrip(df, use_headers) - - if use_headers: - assert res.shape == (nrows, ncols + i) - else: - # first row taken as columns - assert res.shape == (nrows - 1, ncols + i) + return read_excel(xf, xf.sheet_names[0], header=parser_hdr) - # no nans - for r in range(len(res.index)): - for c in range(len(res.columns)): - assert res.iloc[r, c] is not np.nan + # Basic test. + parser_header = 0 if use_headers else None + res = roundtrip(DataFrame([0]), use_headers, parser_header) - res = roundtrip(DataFrame([0])) - assert res.shape == (1, 1) - assert res.iloc[0, 0] is not np.nan - - res = roundtrip(DataFrame([0]), False, None) assert res.shape == (1, 2) assert res.iloc[0, 0] is not np.nan - def test_excel_010_hemstring_raises_NotImplementedError(self, merge_cells, - engine, ext): - # This test was failing only for j>1 and header=False, - # So I reproduced a simple test. - if merge_cells: - pytest.skip('Skip tests for merged MI format.') + # More complex tests with multi-index. + nrows = 5 + ncols = 3 from pandas.util.testing import makeCustomDataframe as mkdf # ensure limited functionality in 0.10 - # override of #2370 until sorted out in 0.11 + # override of gh-2370 until sorted out in 0.11 - def roundtrip2(df, header=True, parser_hdr=0, index=True): + df = mkdf(nrows, ncols, r_idx_nlevels=r_idx_nlevels, + c_idx_nlevels=c_idx_nlevels) - df.to_excel(self.path, header=header, - merge_cells=merge_cells, index=index) - xf = ExcelFile(self.path) - res = read_excel(xf, xf.sheet_names[0], header=parser_hdr) - return res + # This if will be removed once multi-column Excel writing + # is implemented. For now fixing gh-9794. + if c_idx_nlevels > 1: + with pytest.raises(NotImplementedError): + roundtrip(df, use_headers, index=False) + else: + res = roundtrip(df, use_headers) - nrows = 5 - ncols = 3 - j = 2 - i = 1 - df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) - with pytest.raises(NotImplementedError): - roundtrip2(df, header=False, index=False) + if use_headers: + assert res.shape == (nrows, ncols + r_idx_nlevels) + else: + # First row taken as columns. + assert res.shape == (nrows - 1, ncols + r_idx_nlevels) + + # No NaNs. + for r in range(len(res.index)): + for c in range(len(res.columns)): + assert res.iloc[r, c] is not np.nan - def test_duplicated_columns(self, merge_cells, engine, ext): - # Test for issue #5235 + def test_duplicated_columns(self, *_): + # see gh-5235 write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) - colnames = ['A', 'B', 'B'] + col_names = ["A", "B", "B"] - write_frame.columns = colnames - write_frame.to_excel(self.path, 'test1') + write_frame.columns = col_names + write_frame.to_excel(self.path, "test1") + + read_frame = read_excel(self.path, "test1", index_col=0) + read_frame.columns = col_names - read_frame = read_excel(self.path, 'test1') - read_frame.columns = colnames tm.assert_frame_equal(write_frame, read_frame) - # 11007 / #10970 + # see gh-11007, gh-10970 write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], - columns=['A', 'B', 'A', 'B']) - write_frame.to_excel(self.path, 'test1') - read_frame = read_excel(self.path, 'test1') - read_frame.columns = ['A', 'B', 'A', 'B'] + columns=["A", "B", "A", "B"]) + write_frame.to_excel(self.path, "test1") + + read_frame = read_excel(self.path, "test1", index_col=0) + read_frame.columns = ["A", "B", "A", "B"] + tm.assert_frame_equal(write_frame, read_frame) - # 10982 - write_frame.to_excel(self.path, 'test1', index=False, header=False) - read_frame = read_excel(self.path, 'test1', header=None) + # see gh-10982 + write_frame.to_excel(self.path, "test1", index=False, header=False) + read_frame = read_excel(self.path, "test1", header=None) + write_frame.columns = [0, 1, 2, 3] tm.assert_frame_equal(write_frame, read_frame) @@ -1805,36 +1864,40 @@ def test_swapped_columns(self, merge_cells, engine, ext): tm.assert_series_equal(write_frame['A'], read_frame['A']) tm.assert_series_equal(write_frame['B'], read_frame['B']) - def test_invalid_columns(self, merge_cells, engine, ext): - # 10982 - write_frame = DataFrame({'A': [1, 1, 1], - 'B': [2, 2, 2]}) + def test_invalid_columns(self, *_): + # see gh-10982 + write_frame = DataFrame({"A": [1, 1, 1], + "B": [2, 2, 2]}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - write_frame.to_excel(self.path, 'test1', columns=['B', 'C']) - expected = write_frame.reindex(columns=['B', 'C']) - read_frame = read_excel(self.path, 'test1') + write_frame.to_excel(self.path, "test1", columns=["B", "C"]) + + expected = write_frame.reindex(columns=["B", "C"]) + read_frame = read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(expected, read_frame) with pytest.raises(KeyError): - write_frame.to_excel(self.path, 'test1', columns=['C', 'D']) + write_frame.to_excel(self.path, "test1", columns=["C", "D"]) - def test_comment_arg(self, merge_cells, engine, ext): - # Re issue #18735 - # Test the comment argument functionality to read_excel + def test_comment_arg(self, *_): + # see gh-18735 + # + # Test the comment argument functionality to read_excel. - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(self.path, 'test_c') + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], + "B": ["two", "two", "#two"]}) + df.to_excel(self.path, "test_c") + + # Read file without comment arg. + result1 = read_excel(self.path, "test_c", index_col=0) - # Read file without comment arg - result1 = read_excel(self.path, 'test_c') result1.iloc[1, 0] = None result1.iloc[1, 1] = None result1.iloc[2, 1] = None - result2 = read_excel(self.path, 'test_c', comment='#') + + result2 = read_excel(self.path, "test_c", comment="#", index_col=0) tm.assert_frame_equal(result1, result2) def test_comment_default(self, merge_cells, engine, ext): @@ -1851,22 +1914,23 @@ def test_comment_default(self, merge_cells, engine, ext): result2 = read_excel(self.path, 'test_c', comment=None) tm.assert_frame_equal(result1, result2) - def test_comment_used(self, merge_cells, engine, ext): - # Re issue #18735 - # Test the comment argument is working as expected when used + def test_comment_used(self, *_): + # see gh-18735 + # + # Test the comment argument is working as expected when used. - # Create file to read in - df = DataFrame({'A': ['one', '#one', 'one'], - 'B': ['two', 'two', '#two']}) - df.to_excel(self.path, 'test_c') + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], + "B": ["two", "two", "#two"]}) + df.to_excel(self.path, "test_c") - # Test read_frame_comment against manually produced expected output - expected = DataFrame({'A': ['one', None, 'one'], - 'B': ['two', None, None]}) - result = read_excel(self.path, 'test_c', comment='#') + # Test read_frame_comment against manually produced expected output. + expected = DataFrame({"A": ["one", None, "one"], + "B": ["two", None, None]}) + result = read_excel(self.path, "test_c", comment="#", index_col=0) tm.assert_frame_equal(result, expected) - def test_comment_emptyline(self, merge_cells, engine, ext): + def test_comment_empty_line(self, merge_cells, engine, ext): # Re issue #18735 # Test that read_excel ignores commented lines at the end of file @@ -1899,64 +1963,69 @@ def test_datetimes(self, merge_cells, engine, ext): tm.assert_series_equal(write_frame['A'], read_frame['A']) - # GH7074 def test_bytes_io(self, merge_cells, engine, ext): + # see gh-7074 bio = BytesIO() df = DataFrame(np.random.randn(10, 2)) - # pass engine explicitly as there is no file path to infer from + + # Pass engine explicitly, as there is no file path to infer from. writer = ExcelWriter(bio, engine=engine) df.to_excel(writer) writer.save() + bio.seek(0) - reread_df = read_excel(bio) + reread_df = read_excel(bio, index_col=0) tm.assert_frame_equal(df, reread_df) - # GH8188 - def test_write_lists_dict(self, merge_cells, engine, ext): - df = DataFrame({'mixed': ['a', ['b', 'c'], {'d': 'e', 'f': 2}], - 'numeric': [1, 2, 3.0], - 'str': ['apple', 'banana', 'cherry']}) + def test_write_lists_dict(self, *_): + # see gh-8188. + df = DataFrame({"mixed": ["a", ["b", "c"], {"d": "e", "f": 2}], + "numeric": [1, 2, 3.0], + "str": ["apple", "banana", "cherry"]}) + df.to_excel(self.path, "Sheet1") + read = read_excel(self.path, "Sheet1", header=0, index_col=0) + expected = df.copy() expected.mixed = expected.mixed.apply(str) - expected.numeric = expected.numeric.astype('int64') + expected.numeric = expected.numeric.astype("int64") - df.to_excel(self.path, 'Sheet1') - read = read_excel(self.path, 'Sheet1', header=0) tm.assert_frame_equal(read, expected) - # GH13347 - def test_true_and_false_value_options(self, merge_cells, engine, ext): - df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2']) - expected = df.replace({'foo': True, - 'bar': False}) + def test_true_and_false_value_options(self, *_): + # see gh-13347 + df = pd.DataFrame([["foo", "bar"]], columns=["col1", "col2"]) + expected = df.replace({"foo": True, "bar": False}) df.to_excel(self.path) - read_frame = read_excel(self.path, true_values=['foo'], - false_values=['bar']) + read_frame = read_excel(self.path, true_values=["foo"], + false_values=["bar"], index_col=0) tm.assert_frame_equal(read_frame, expected) - def test_freeze_panes(self, merge_cells, engine, ext): - # GH15160 - expected = DataFrame([[1, 2], [3, 4]], columns=['col1', 'col2']) + def test_freeze_panes(self, *_): + # see gh-15160 + expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1)) - result = read_excel(self.path) - tm.assert_frame_equal(expected, result) - def test_path_pathlib(self, merge_cells, engine, ext): + result = read_excel(self.path, index_col=0) + tm.assert_frame_equal(result, expected) + + def test_path_path_lib(self, merge_cells, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) - reader = partial(pd.read_excel) + + reader = partial(pd.read_excel, index_col=0) result = tm.round_trip_pathlib(writer, reader, - path="foo.{}".format(ext)) - tm.assert_frame_equal(df, result) + path="foo.{ext}".format(ext=ext)) + tm.assert_frame_equal(result, df) - def test_path_localpath(self, merge_cells, engine, ext): + def test_path_local_path(self, merge_cells, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) - reader = partial(pd.read_excel) + + reader = partial(pd.read_excel, index_col=0) result = tm.round_trip_pathlib(writer, reader, - path="foo.{}".format(ext)) - tm.assert_frame_equal(df, result) + path="foo.{ext}".format(ext=ext)) + tm.assert_frame_equal(result, df) @td.skip_if_no('openpyxl') From 58a59bd606911f11a4f679df18a00cfc25536c30 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 11 Nov 2018 08:20:04 -0800 Subject: [PATCH 11/11] BUG: Casting tz-aware DatetimeIndex to object-dtype ndarray/Index (#23524) --- doc/source/whatsnew/v0.24.0.txt | 4 ++ pandas/_libs/tslibs/offsets.pyx | 9 +++- pandas/core/arrays/datetimes.py | 10 ++++ pandas/core/indexes/base.py | 14 ++++-- pandas/tests/arrays/test_datetimelike.py | 48 ++++++++++++++++++++ pandas/tests/indexes/test_base.py | 12 ++++- pandas/tests/tseries/offsets/test_offsets.py | 13 ++++++ pandas/tests/tseries/offsets/test_ticks.py | 22 +++++++++ pandas/tseries/offsets.py | 31 ++++++++++--- 9 files changed, 149 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b4e959518245f..3c05d4d90e44d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1128,6 +1128,9 @@ Datetimelike - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) - Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`) - Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`) +- Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`) +- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`) +- Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`) Timedelta ^^^^^^^^^ @@ -1174,6 +1177,7 @@ Offsets - Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) - Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`) - Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`) +- Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`) Numeric ^^^^^^^ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 0495202818eb5..7ef38cba0c37f 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -308,8 +308,13 @@ class _BaseOffset(object): def __eq__(self, other): if is_string_object(other): - other = to_offset(other) - + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False try: return self._params == other._params except AttributeError: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 405056c628ceb..08b83598bb6af 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -19,6 +19,7 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, is_object_dtype, + is_int64_dtype, is_datetime64tz_dtype, is_datetime64_dtype, ensure_int64) @@ -388,6 +389,15 @@ def _resolution(self): # ---------------------------------------------------------------- # Array-like Methods + def __array__(self, dtype=None): + if is_object_dtype(dtype): + return np.array(list(self), dtype=object) + elif is_int64_dtype(dtype): + return self.asi8 + + # TODO: warn that conversion may be lossy? + return self._data.view(np.ndarray) # follow Index.__array__ + def __iter__(self): """ Return an iterator over the boxed values diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8470bc6fec490..263de57d32f31 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -301,11 +301,19 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, (dtype is not None and is_datetime64_any_dtype(dtype)) or 'tz' in kwargs): from pandas import DatetimeIndex - result = DatetimeIndex(data, copy=copy, name=name, - dtype=dtype, **kwargs) + if dtype is not None and is_dtype_equal(_o_dtype, dtype): - return Index(result.to_pydatetime(), dtype=_o_dtype) + # GH#23524 passing `dtype=object` to DatetimeIndex is invalid, + # will raise in the where `data` is already tz-aware. So + # we leave it out of this step and cast to object-dtype after + # the DatetimeIndex construction. + # Note we can pass copy=False because the .astype below + # will always make a copy + result = DatetimeIndex(data, copy=False, name=name, **kwargs) + return result.astype(object) else: + result = DatetimeIndex(data, copy=copy, name=name, + dtype=dtype, **kwargs) return result elif (is_timedelta64_dtype(data) or diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3fd03a351de7c..5ba99a48e34ad 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -57,6 +57,54 @@ def timedelta_index(request): class TestDatetimeArray(object): + def test_array_object_dtype(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + + expected = np.array(list(dti)) + + result = np.array(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # also test the DatetimeIndex method while we're at it + result = np.array(dti, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_array(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + + expected = dti.asi8.view('M8[ns]') + result = np.array(arr) + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, copy=False) + assert result.base is expected.base + assert result.base is not None + + def test_array_i8_dtype(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + + expected = dti.asi8 + result = np.array(arr, dtype='i8') + tm.assert_numpy_array_equal(result, expected) + + result = np.array(arr, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, dtype='i8', copy=False) + assert result.base is expected.base + assert result.base is not None + def test_from_dti(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 666420a6a9b06..4a3efe22926f7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -132,7 +132,7 @@ def test_construction_list_tuples_nan(self, na_value, vtype): @pytest.mark.parametrize("cast_as_obj", [True, False]) @pytest.mark.parametrize("index", [ pd.date_range('2015-01-01 10:00', freq='D', periods=3, - tz='US/Eastern'), # DTI with tz + tz='US/Eastern', name='Green Eggs & Ham'), # DTI with tz pd.date_range('2015-01-01 10:00', freq='D', periods=3), # DTI no tz pd.timedelta_range('1 days', freq='D', periods=3), # td pd.period_range('2015-01-01', freq='D', periods=3) # period @@ -145,8 +145,16 @@ def test_constructor_from_index_dtlike(self, cast_as_obj, index): tm.assert_index_equal(result, index) - if isinstance(index, pd.DatetimeIndex) and hasattr(index, 'tz'): + if isinstance(index, pd.DatetimeIndex): assert result.tz == index.tz + if cast_as_obj: + # GH#23524 check that Index(dti, dtype=object) does not + # incorrectly raise ValueError, and that nanoseconds are not + # dropped + index += pd.Timedelta(nanoseconds=50) + result = pd.Index(index, dtype=object) + assert result.dtype == np.object_ + assert list(result) == list(index) @pytest.mark.parametrize("index,has_tz", [ (pd.date_range('2015-01-01 10:00', freq='D', periods=3, diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index cbd3e0903b713..d68dd65c9841b 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -187,6 +187,19 @@ def testMult2(self): assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50) assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6) + def test_compare_str(self): + # GH#23524 + # comparing to strings that cannot be cast to DateOffsets should + # not raise for __eq__ or __ne__ + if self._offset is None: + return + off = self._get_offset(self._offset) + + assert not off == "infer" + assert off != "foo" + # Note: inequalities are only implemented for Tick subclasses; + # tests for this are in test_ticks + class TestCommon(Base): # exected value created by Base._get_offset diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 369c0971f1e9a..128010fe6d32c 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -267,3 +267,25 @@ def test_compare_ticks(cls): assert cls(4) > three assert cls(3) == cls(3) assert cls(3) != cls(4) + + +@pytest.mark.parametrize('cls', tick_classes) +def test_compare_ticks_to_strs(cls): + # GH#23524 + off = cls(19) + + # These tests should work with any strings, but we particularly are + # interested in "infer" as that comparison is convenient to make in + # Datetime/Timedelta Array/Index constructors + assert not off == "infer" + assert not "foo" == off + + for left, right in [("infer", off), (off, "infer")]: + with pytest.raises(TypeError): + left < right + with pytest.raises(TypeError): + left <= right + with pytest.raises(TypeError): + left > right + with pytest.raises(TypeError): + left >= right diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 53719b71d1180..25c419e485db1 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2199,9 +2199,18 @@ def apply_index(self, i): def _tick_comp(op): + assert op not in [operator.eq, operator.ne] + def f(self, other): - return op(self.delta, other.delta) + try: + return op(self.delta, other.delta) + except AttributeError: + # comparing with a non-Tick object + raise TypeError("Invalid comparison between {cls} and {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) + f.__name__ = '__{opname}__'.format(opname=op.__name__) return f @@ -2220,8 +2229,6 @@ def __init__(self, n=1, normalize=False): __ge__ = _tick_comp(operator.ge) __lt__ = _tick_comp(operator.lt) __le__ = _tick_comp(operator.le) - __eq__ = _tick_comp(operator.eq) - __ne__ = _tick_comp(operator.ne) def __add__(self, other): if isinstance(other, Tick): @@ -2242,8 +2249,13 @@ def __add__(self, other): def __eq__(self, other): if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset - - other = to_offset(other) + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False if isinstance(other, Tick): return self.delta == other.delta @@ -2258,8 +2270,13 @@ def __hash__(self): def __ne__(self, other): if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset - - other = to_offset(other) + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return True if isinstance(other, Tick): return self.delta != other.delta