From b9503d054ca8c1d7a5e6ba9d1327639d1cee2589 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 16 Jan 2024 17:48:25 -0500 Subject: [PATCH 1/4] ENH: Allow performance warnings to be disabled --- pandas/core/arrays/datetimelike.py | 15 +++++++++------ pandas/core/arrays/datetimes.py | 14 +++++++++----- pandas/core/arrays/sparse/array.py | 7 +++++-- pandas/core/computation/align.py | 8 +++++++- pandas/core/config_init.py | 13 +++++++++++++ pandas/core/dtypes/dtypes.py | 6 +++++- pandas/core/indexes/multi.py | 14 ++++++++------ pandas/core/internals/managers.py | 6 +++++- pandas/core/reshape/reshape.py | 4 +++- pandas/io/pytables.py | 3 ++- 10 files changed, 66 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 44049f73b792b..db006d8d9a532 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -20,6 +20,8 @@ import numpy as np +from pandas._config.config import _get_option + from pandas._libs import ( algos, lib, @@ -1331,12 +1333,13 @@ def _addsub_object_array(self, other: npt.NDArray[np.object_], op) -> np.ndarray # If both 1D then broadcasting is unambiguous return op(self, other[0]) - warnings.warn( - "Adding/subtracting object-dtype array to " - f"{type(self).__name__} not vectorized.", - PerformanceWarning, - stacklevel=find_stack_level(), - ) + if _get_option("performance_warnings"): + warnings.warn( + "Adding/subtracting object-dtype array to " + f"{type(self).__name__} not vectorized.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) # Caller is responsible for broadcasting if necessary assert self.shape == other.shape, (self.shape, other.shape) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cd6689e9f1ce2..87505d883a34b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -15,6 +15,8 @@ import numpy as np +from pandas._config.config import _get_option + from pandas._libs import ( lib, tslib, @@ -819,11 +821,13 @@ def _add_offset(self, offset: BaseOffset) -> Self: # "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]" res_values = res_values.view(values.dtype) # type: ignore[arg-type] except NotImplementedError: - warnings.warn( - "Non-vectorized DateOffset being applied to Series or DatetimeIndex.", - PerformanceWarning, - stacklevel=find_stack_level(), - ) + if _get_option("performance_warnings"): + warnings.warn( + "Non-vectorized DateOffset being applied to Series or " + "DatetimeIndex.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) res_values = self.astype("O") + offset # TODO(GH#55564): as_unit will be unnecessary result = type(self)._from_sequence(res_values).as_unit(self.unit) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index fafeedc01b02b..9150a011b1bc5 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -18,6 +18,8 @@ import numpy as np +from pandas._config.config import _get_option + from pandas._libs import lib import pandas._libs.sparse as splib from pandas._libs.sparse import ( @@ -1154,8 +1156,9 @@ def searchsorted( side: Literal["left", "right"] = "left", sorter: NumpySorter | None = None, ) -> npt.NDArray[np.intp] | np.intp: - msg = "searchsorted requires high memory usage." - warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) + if _get_option("performance_warnings"): + msg = "searchsorted requires high memory usage." + warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) v = np.asarray(v) return np.asarray(self, dtype=self.dtype.subtype).searchsorted(v, side, sorter) diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index cd852ba9249cf..add46bd51f1e3 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -15,6 +15,8 @@ import numpy as np +from pandas._config.config import _get_option + from pandas.errors import PerformanceWarning from pandas.util._exceptions import find_stack_level @@ -124,7 +126,11 @@ def _align_core(terms): reindexer_size = len(reindexer) ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) - if ordm >= 1 and reindexer_size >= 10000: + if ( + _get_option("performance_warnings") + and ordm >= 1 + and reindexer_size >= 10000 + ): w = ( f"Alignment difference on axis {axis} is larger " f"than an order of magnitude on term {repr(terms[i].name)}, " diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 0a9d5af7cbd42..b61266b45659e 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -499,6 +499,19 @@ def use_inf_as_na_cb(key) -> None: validator=is_one_of_factory([None, "warn", "raise"]), ) +performance_warning = """ +: boolean + Whether to show or hide PerformanceWarnings. +""" + +with cf.config_prefix("mode"): + cf.register_option( + "performance_warnings", + True, + performance_warning, + validator=is_bool, + ) + string_storage_doc = """ : string diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e90e92fa0ee1c..0c9e118a417c7 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -21,6 +21,8 @@ import numpy as np import pytz +from pandas._config.config import _get_option + from pandas._libs import ( lib, missing as libmissing, @@ -2030,7 +2032,9 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: # np.nan isn't a singleton, so we may end up with multiple # NaNs here, so we ignore the all NA case too. - if not (len(set(fill_values)) == 1 or isna(fill_values).all()): + if _get_option("performance_warnings") and ( + not (len(set(fill_values)) == 1 or isna(fill_values).all()) + ): warnings.warn( "Concatenating sparse arrays with multiple fill " f"values: '{fill_values}'. Picking the first and " diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 25bcc1f307082..ccff7e4b634a4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -21,6 +21,7 @@ import numpy as np from pandas._config import get_option +from pandas._config.config import _get_option from pandas._libs import ( algos as libalgos, @@ -2428,7 +2429,7 @@ def drop( # type: ignore[override] step = loc.step if loc.step is not None else 1 inds.extend(range(loc.start, loc.stop, step)) elif com.is_bool_indexer(loc): - if self._lexsort_depth == 0: + if _get_option("performance_warning") and self._lexsort_depth == 0: warnings.warn( "dropping on a non-lexsorted multi-index " "without a level parameter may impact performance.", @@ -3086,11 +3087,12 @@ def _maybe_to_slice(loc): if not follow_key: return slice(start, stop) - warnings.warn( - "indexing past lexsort depth may impact performance.", - PerformanceWarning, - stacklevel=find_stack_level(), - ) + if _get_option("performance_warning"): + warnings.warn( + "indexing past lexsort depth may impact performance.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) loc = np.arange(start, stop, dtype=np.intp) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d08dee3663395..08dfb9af23163 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -20,6 +20,7 @@ using_copy_on_write, warn_copy_on_write, ) +from pandas._config.config import _get_option from pandas._libs import ( internals as libinternals, @@ -1392,7 +1393,10 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None: self._known_consolidated = False - if sum(not block.is_extension for block in self.blocks) > 100: + if ( + _get_option("performance_warning") + and sum(not block.is_extension for block in self.blocks) > 100 + ): warnings.warn( "DataFrame is highly fragmented. This is usually the result " "of calling `frame.insert` many times, which has poor performance. " diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 3493f1c78da91..e4dcad9a5bab0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -9,6 +9,8 @@ import numpy as np +from pandas._config.config import _get_option + import pandas._libs.reshape as libreshape from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly @@ -143,7 +145,7 @@ def __init__( num_cells = num_rows * num_columns # GH 26314: Previous ValueError raised was too restrictive for many users. - if num_cells > np.iinfo(np.int32).max: + if _get_option("performance_warning") and num_cells > np.iinfo(np.int32).max: warnings.warn( f"The following operation may generate {num_cells} cells " f"in the resulting pandas object.", diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 36c9b66f2bd47..9d81faa12c548 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -33,6 +33,7 @@ using_copy_on_write, using_pyarrow_string_dtype, ) +from pandas._config.config import _get_option from pandas._libs import ( lib, @@ -3162,7 +3163,7 @@ def write_array( pass elif inferred_type == "string": pass - else: + elif _get_option("performance_warning"): ws = performance_doc % (inferred_type, key, items) warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level()) From 1c4f797677b779077a1aaacae6004833b4a1f212 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 16 Jan 2024 17:48:25 -0500 Subject: [PATCH 2/4] ENH: Allow performance warnings to be disabled --- doc/source/whatsnew/v2.3.0.rst | 2 +- pandas/core/arrays/arrow/_arrow_utils.py | 11 +++++++---- pandas/core/arrays/datetimelike.py | 15 +++++++++------ pandas/core/arrays/datetimes.py | 14 +++++++++----- pandas/core/arrays/sparse/array.py | 7 +++++-- pandas/core/computation/align.py | 8 +++++++- pandas/core/config_init.py | 13 +++++++++++++ pandas/core/dtypes/dtypes.py | 6 +++++- pandas/core/indexes/multi.py | 14 ++++++++------ pandas/core/internals/managers.py | 6 +++++- pandas/core/reshape/reshape.py | 4 +++- pandas/io/pytables.py | 3 ++- 12 files changed, 74 insertions(+), 29 deletions(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index e217e8c8557bb..09cf2cedab0ba 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -28,7 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`???`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/arrow/_arrow_utils.py b/pandas/core/arrays/arrow/_arrow_utils.py index 2a053fac2985c..01e496945fba5 100644 --- a/pandas/core/arrays/arrow/_arrow_utils.py +++ b/pandas/core/arrays/arrow/_arrow_utils.py @@ -5,6 +5,8 @@ import numpy as np import pyarrow +from pandas._config.config import _get_option + from pandas.errors import PerformanceWarning from pandas.util._exceptions import find_stack_level @@ -14,10 +16,11 @@ def fallback_performancewarning(version: str | None = None) -> None: Raise a PerformanceWarning for falling back to ExtensionArray's non-pyarrow method """ - msg = "Falling back on a non-pyarrow code path which may decrease performance." - if version is not None: - msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning." - warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) + if _get_option("performance_warnings"): + msg = "Falling back on a non-pyarrow code path which may decrease performance." + if version is not None: + msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning." + warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) def pyarrow_array_to_numpy_and_mask( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 44049f73b792b..db006d8d9a532 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -20,6 +20,8 @@ import numpy as np +from pandas._config.config import _get_option + from pandas._libs import ( algos, lib, @@ -1331,12 +1333,13 @@ def _addsub_object_array(self, other: npt.NDArray[np.object_], op) -> np.ndarray # If both 1D then broadcasting is unambiguous return op(self, other[0]) - warnings.warn( - "Adding/subtracting object-dtype array to " - f"{type(self).__name__} not vectorized.", - PerformanceWarning, - stacklevel=find_stack_level(), - ) + if _get_option("performance_warnings"): + warnings.warn( + "Adding/subtracting object-dtype array to " + f"{type(self).__name__} not vectorized.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) # Caller is responsible for broadcasting if necessary assert self.shape == other.shape, (self.shape, other.shape) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cd6689e9f1ce2..87505d883a34b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -15,6 +15,8 @@ import numpy as np +from pandas._config.config import _get_option + from pandas._libs import ( lib, tslib, @@ -819,11 +821,13 @@ def _add_offset(self, offset: BaseOffset) -> Self: # "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]" res_values = res_values.view(values.dtype) # type: ignore[arg-type] except NotImplementedError: - warnings.warn( - "Non-vectorized DateOffset being applied to Series or DatetimeIndex.", - PerformanceWarning, - stacklevel=find_stack_level(), - ) + if _get_option("performance_warnings"): + warnings.warn( + "Non-vectorized DateOffset being applied to Series or " + "DatetimeIndex.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) res_values = self.astype("O") + offset # TODO(GH#55564): as_unit will be unnecessary result = type(self)._from_sequence(res_values).as_unit(self.unit) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index fafeedc01b02b..9150a011b1bc5 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -18,6 +18,8 @@ import numpy as np +from pandas._config.config import _get_option + from pandas._libs import lib import pandas._libs.sparse as splib from pandas._libs.sparse import ( @@ -1154,8 +1156,9 @@ def searchsorted( side: Literal["left", "right"] = "left", sorter: NumpySorter | None = None, ) -> npt.NDArray[np.intp] | np.intp: - msg = "searchsorted requires high memory usage." - warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) + if _get_option("performance_warnings"): + msg = "searchsorted requires high memory usage." + warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) v = np.asarray(v) return np.asarray(self, dtype=self.dtype.subtype).searchsorted(v, side, sorter) diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index cd852ba9249cf..add46bd51f1e3 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -15,6 +15,8 @@ import numpy as np +from pandas._config.config import _get_option + from pandas.errors import PerformanceWarning from pandas.util._exceptions import find_stack_level @@ -124,7 +126,11 @@ def _align_core(terms): reindexer_size = len(reindexer) ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) - if ordm >= 1 and reindexer_size >= 10000: + if ( + _get_option("performance_warnings") + and ordm >= 1 + and reindexer_size >= 10000 + ): w = ( f"Alignment difference on axis {axis} is larger " f"than an order of magnitude on term {repr(terms[i].name)}, " diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 0a9d5af7cbd42..6ac570a4961b2 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -499,6 +499,19 @@ def use_inf_as_na_cb(key) -> None: validator=is_one_of_factory([None, "warn", "raise"]), ) +performance_warnings = """ +: boolean + Whether to show or hide PerformanceWarnings. +""" + +with cf.config_prefix("mode"): + cf.register_option( + "performance_warnings", + True, + performance_warnings, + validator=is_bool, + ) + string_storage_doc = """ : string diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e90e92fa0ee1c..0c9e118a417c7 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -21,6 +21,8 @@ import numpy as np import pytz +from pandas._config.config import _get_option + from pandas._libs import ( lib, missing as libmissing, @@ -2030,7 +2032,9 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: # np.nan isn't a singleton, so we may end up with multiple # NaNs here, so we ignore the all NA case too. - if not (len(set(fill_values)) == 1 or isna(fill_values).all()): + if _get_option("performance_warnings") and ( + not (len(set(fill_values)) == 1 or isna(fill_values).all()) + ): warnings.warn( "Concatenating sparse arrays with multiple fill " f"values: '{fill_values}'. Picking the first and " diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 25bcc1f307082..daf0588eb5a4e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -21,6 +21,7 @@ import numpy as np from pandas._config import get_option +from pandas._config.config import _get_option from pandas._libs import ( algos as libalgos, @@ -2428,7 +2429,7 @@ def drop( # type: ignore[override] step = loc.step if loc.step is not None else 1 inds.extend(range(loc.start, loc.stop, step)) elif com.is_bool_indexer(loc): - if self._lexsort_depth == 0: + if _get_option("performance_warnings") and self._lexsort_depth == 0: warnings.warn( "dropping on a non-lexsorted multi-index " "without a level parameter may impact performance.", @@ -3086,11 +3087,12 @@ def _maybe_to_slice(loc): if not follow_key: return slice(start, stop) - warnings.warn( - "indexing past lexsort depth may impact performance.", - PerformanceWarning, - stacklevel=find_stack_level(), - ) + if _get_option("performance_warnings"): + warnings.warn( + "indexing past lexsort depth may impact performance.", + PerformanceWarning, + stacklevel=find_stack_level(), + ) loc = np.arange(start, stop, dtype=np.intp) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d08dee3663395..f493356ee8895 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -20,6 +20,7 @@ using_copy_on_write, warn_copy_on_write, ) +from pandas._config.config import _get_option from pandas._libs import ( internals as libinternals, @@ -1392,7 +1393,10 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None: self._known_consolidated = False - if sum(not block.is_extension for block in self.blocks) > 100: + if ( + _get_option("performance_warnings") + and sum(not block.is_extension for block in self.blocks) > 100 + ): warnings.warn( "DataFrame is highly fragmented. This is usually the result " "of calling `frame.insert` many times, which has poor performance. " diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 3493f1c78da91..aca49065659df 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -9,6 +9,8 @@ import numpy as np +from pandas._config.config import _get_option + import pandas._libs.reshape as libreshape from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly @@ -143,7 +145,7 @@ def __init__( num_cells = num_rows * num_columns # GH 26314: Previous ValueError raised was too restrictive for many users. - if num_cells > np.iinfo(np.int32).max: + if _get_option("performance_warnings") and num_cells > np.iinfo(np.int32).max: warnings.warn( f"The following operation may generate {num_cells} cells " f"in the resulting pandas object.", diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 36c9b66f2bd47..80fc2e5a98deb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -33,6 +33,7 @@ using_copy_on_write, using_pyarrow_string_dtype, ) +from pandas._config.config import _get_option from pandas._libs import ( lib, @@ -3162,7 +3163,7 @@ def write_array( pass elif inferred_type == "string": pass - else: + elif _get_option("performance_warnings"): ws = performance_doc % (inferred_type, key, items) warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level()) From 1077112d845c7311d53515843ab3bcd10e5a5a04 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 5 Feb 2024 20:50:16 -0500 Subject: [PATCH 3/4] Add tests --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/conftest.py | 10 +++ pandas/core/arrays/string_arrow.py | 8 +- pandas/tests/arithmetic/test_datetime64.py | 31 +++---- pandas/tests/arithmetic/test_period.py | 25 +++--- pandas/tests/arithmetic/test_timedelta64.py | 69 +++++++++------- pandas/tests/computation/test_eval.py | 10 ++- pandas/tests/extension/test_sparse.py | 8 +- pandas/tests/frame/indexing/test_indexing.py | 5 +- pandas/tests/frame/indexing/test_insert.py | 4 +- pandas/tests/frame/methods/test_drop.py | 6 +- pandas/tests/frame/test_block_internals.py | 8 +- pandas/tests/frame/test_stack_unstack.py | 7 +- pandas/tests/groupby/test_groupby.py | 9 +-- .../indexes/datetimes/methods/test_shift.py | 4 +- pandas/tests/indexes/multi/test_drop.py | 6 +- pandas/tests/indexes/multi/test_indexing.py | 9 +-- pandas/tests/indexes/multi/test_sorting.py | 9 +-- pandas/tests/indexing/multiindex/test_loc.py | 10 +-- .../indexing/multiindex/test_multiindex.py | 7 +- pandas/tests/io/pytables/test_put.py | 4 +- pandas/tests/io/pytables/test_round_trip.py | 4 +- pandas/tests/io/pytables/test_store.py | 4 +- pandas/tests/reshape/concat/test_index.py | 12 ++- pandas/tests/reshape/test_pivot.py | 8 +- pandas/tests/strings/test_find_replace.py | 81 ++++++++++++------- pandas/tests/tseries/offsets/test_dst.py | 25 ++++-- pandas/tests/tseries/offsets/test_offsets.py | 30 ++++--- 28 files changed, 228 insertions(+), 186 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f9117253b61c1..8f74d078bd78a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -29,6 +29,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) +- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - .. --------------------------------------------------------------------------- diff --git a/pandas/conftest.py b/pandas/conftest.py index db251a07aeb5d..6a1af3d91d64b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1966,6 +1966,16 @@ def using_copy_on_write() -> bool: return True +@pytest.fixture(params=[True, False]) +def performance_warning(request) -> bool: + """ + Fixture to check if performance warnings are enabled. Either produces + ``PerformanceWarning`` if they are enabled, otherwise ``False``. + """ + with pd.option_context("mode.performance_warnings", request.param): + yield pd.errors.PerformanceWarning if request.param else False + + @pytest.fixture def warn_copy_on_write() -> bool: """ diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index a76eef8095695..6b1b8e45d389a 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -13,6 +13,8 @@ import numpy as np +from pandas._config.config import _get_option + from pandas._libs import ( lib, missing as libmissing, @@ -354,7 +356,8 @@ def _str_contains( self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True ): if flags: - fallback_performancewarning() + if _get_option("mode.performance_warnings"): + fallback_performancewarning() return super()._str_contains(pat, case, flags, na, regex) if regex: @@ -414,7 +417,8 @@ def _str_replace( regex: bool = True, ): if isinstance(pat, re.Pattern) or callable(repl) or not case or flags: - fallback_performancewarning() + if _get_option("mode.performance_warnings"): + fallback_performancewarning() return super()._str_replace(pat, repl, n, case, flags, regex) func = pc.replace_substring_regex if regex else pc.replace_substring diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 2dafaf277be8f..953c4fa139d17 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -18,7 +18,6 @@ from pandas._libs.tslibs.conversion import localize_pydatetime from pandas._libs.tslibs.offsets import shift_months -from pandas.errors import PerformanceWarning import pandas as pd from pandas import ( @@ -1008,14 +1007,16 @@ def test_dt64arr_sub_NaT(self, box_with_array, unit): # ------------------------------------------------------------- # Subtraction of datetime-like array-like - def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): + def test_dt64arr_sub_dt64object_array( + self, performance_warning, box_with_array, tz_naive_fixture + ): dti = date_range("2016-01-01", periods=3, tz=tz_naive_fixture) expected = dti - dti obj = tm.box_expected(dti, box_with_array) expected = tm.box_expected(expected, box_with_array).astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = obj - obj.astype(object) tm.assert_equal(result, expected) @@ -1493,7 +1494,7 @@ def test_dt64arr_add_sub_DateOffsets( ) @pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub]) def test_dt64arr_add_sub_offset_array( - self, tz_naive_fixture, box_with_array, op, other + self, performance_warning, tz_naive_fixture, box_with_array, op, other ): # GH#18849 # GH#10699 array of offsets @@ -1505,7 +1506,7 @@ def test_dt64arr_add_sub_offset_array( expected = DatetimeIndex([op(dti[n], other[n]) for n in range(len(dti))]) expected = tm.box_expected(expected, box_with_array).astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res = op(dtarr, other) tm.assert_equal(res, expected) @@ -1514,7 +1515,7 @@ def test_dt64arr_add_sub_offset_array( if box_with_array is pd.array and op is roperator.radd: # We expect a NumpyExtensionArray, not ndarray[object] here expected = pd.array(expected, dtype=object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res = op(dtarr, other) tm.assert_equal(res, expected) @@ -2303,7 +2304,7 @@ def test_dti_add_series(self, tz_naive_fixture, names): @pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub]) def test_dti_addsub_offset_arraylike( - self, tz_naive_fixture, names, op, index_or_series + self, performance_warning, tz_naive_fixture, names, op, index_or_series ): # GH#18849, GH#19744 other_box = index_or_series @@ -2314,7 +2315,7 @@ def test_dti_addsub_offset_arraylike( xbox = get_upcast_box(dti, other) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res = op(dti, other) expected = DatetimeIndex( @@ -2325,7 +2326,7 @@ def test_dti_addsub_offset_arraylike( @pytest.mark.parametrize("other_box", [pd.Index, np.array]) def test_dti_addsub_object_arraylike( - self, tz_naive_fixture, box_with_array, other_box + self, performance_warning, tz_naive_fixture, box_with_array, other_box ): tz = tz_naive_fixture @@ -2337,14 +2338,14 @@ def test_dti_addsub_object_arraylike( expected = DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture) expected = tm.box_expected(expected, xbox).astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = dtarr + other tm.assert_equal(result, expected) expected = DatetimeIndex(["2016-12-31", "2016-12-29"], tz=tz_naive_fixture) expected = tm.box_expected(expected, xbox).astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = dtarr - other tm.assert_equal(result, expected) @@ -2370,7 +2371,7 @@ def test_shift_months(years, months, unit): tm.assert_index_equal(actual, expected) -def test_dt64arr_addsub_object_dtype_2d(): +def test_dt64arr_addsub_object_dtype_2d(performance_warning): # block-wise DataFrame operations will require operating on 2D # DatetimeArray/TimedeltaArray, so check that specifically. dti = date_range("1994-02-13", freq="2W", periods=4) @@ -2379,14 +2380,14 @@ def test_dt64arr_addsub_object_dtype_2d(): other = np.array([[pd.offsets.Day(n)] for n in range(4)]) assert other.shape == dta.shape - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = dta + other - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): expected = (dta[:, 0] + other[:, 0]).reshape(-1, 1) tm.assert_numpy_array_equal(result, expected) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): # Case where we expect to get a TimedeltaArray back result2 = dta - dta.astype(object) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 5535fe8ff928d..18f1993c198df 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -12,7 +12,6 @@ Timestamp, to_offset, ) -from pandas.errors import PerformanceWarning import pandas as pd from pandas import ( @@ -868,7 +867,7 @@ def test_parr_sub_td64array(self, box_with_array, tdi_freq, pi_freq): # operations with array/Index of DateOffset objects @pytest.mark.parametrize("box", [np.array, pd.Index]) - def test_pi_add_offset_array(self, box): + def test_pi_add_offset_array(self, performance_warning, box): # GH#18849 pi = PeriodIndex([Period("2015Q1"), Period("2016Q2")]) offs = box( @@ -879,11 +878,11 @@ def test_pi_add_offset_array(self, box): ) expected = PeriodIndex([Period("2015Q2"), Period("2015Q4")]).astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res = pi + offs tm.assert_index_equal(res, expected) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res2 = offs + pi tm.assert_index_equal(res2, expected) @@ -892,14 +891,14 @@ def test_pi_add_offset_array(self, box): # a PerformanceWarning and _then_ raise a TypeError. msg = r"Input cannot be converted to Period\(freq=Q-DEC\)" with pytest.raises(IncompatibleFrequency, match=msg): - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): pi + unanchored with pytest.raises(IncompatibleFrequency, match=msg): - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): unanchored + pi @pytest.mark.parametrize("box", [np.array, pd.Index]) - def test_pi_sub_offset_array(self, box): + def test_pi_sub_offset_array(self, performance_warning, box): # GH#18824 pi = PeriodIndex([Period("2015Q1"), Period("2016Q2")]) other = box( @@ -912,7 +911,7 @@ def test_pi_sub_offset_array(self, box): expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))]) expected = expected.astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res = pi - other tm.assert_index_equal(res, expected) @@ -922,10 +921,10 @@ def test_pi_sub_offset_array(self, box): # a PerformanceWarning and _then_ raise a TypeError. msg = r"Input has different freq=-1M from Period\(freq=Q-DEC\)" with pytest.raises(IncompatibleFrequency, match=msg): - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): pi - anchored with pytest.raises(IncompatibleFrequency, match=msg): - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): anchored - pi def test_pi_add_iadd_int(self, one): @@ -1329,13 +1328,13 @@ def test_parr_add_sub_index(self): expected = pi - pi tm.assert_index_equal(result, expected) - def test_parr_add_sub_object_array(self): + def test_parr_add_sub_object_array(self, performance_warning): pi = period_range("2000-12-31", periods=3, freq="D") parr = pi.array other = np.array([Timedelta(days=1), pd.offsets.Day(2), 3]) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = parr + other expected = PeriodIndex( @@ -1343,7 +1342,7 @@ def test_parr_add_sub_object_array(self): )._data.astype(object) tm.assert_equal(result, expected) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = parr - other expected = PeriodIndex(["2000-12-30"] * 3, freq="D")._data.astype(object) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 3e9508bd2f504..df1a361d1ec65 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -8,10 +8,7 @@ import numpy as np import pytest -from pandas.errors import ( - OutOfBoundsDatetime, - PerformanceWarning, -) +from pandas.errors import OutOfBoundsDatetime import pandas as pd from pandas import ( @@ -577,7 +574,9 @@ def test_tda_add_sub_index(self): expected = tdi - tdi tm.assert_index_equal(result, expected) - def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture): + def test_tda_add_dt64_object_array( + self, performance_warning, box_with_array, tz_naive_fixture + ): # Result should be cast back to DatetimeArray box = box_with_array @@ -588,7 +587,7 @@ def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture): obj = tm.box_expected(tdi, box) other = tm.box_expected(dti, box) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = obj + other.astype(object) tm.assert_equal(result, other.astype(object)) @@ -1282,7 +1281,9 @@ def test_td64arr_sub_timedeltalike(self, two_hours, box_with_array): # ------------------------------------------------------------------ # __add__/__sub__ with DateOffsets and arrays of DateOffsets - def test_td64arr_add_sub_offset_index(self, names, box_with_array): + def test_td64arr_add_sub_offset_index( + self, performance_warning, names, box_with_array + ): # GH#18849, GH#19744 box = box_with_array exname = get_expected_name(box, names) @@ -1302,19 +1303,19 @@ def test_td64arr_add_sub_offset_index(self, names, box_with_array): expected = tm.box_expected(expected, box).astype(object, copy=False) expected_sub = tm.box_expected(expected_sub, box).astype(object, copy=False) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res = tdi + other tm.assert_equal(res, expected) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res2 = other + tdi tm.assert_equal(res2, expected) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res_sub = tdi - other tm.assert_equal(res_sub, expected_sub) - def test_td64arr_add_sub_offset_array(self, box_with_array): + def test_td64arr_add_sub_offset_array(self, performance_warning, box_with_array): # GH#18849, GH#18824 box = box_with_array tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"]) @@ -1330,20 +1331,22 @@ def test_td64arr_add_sub_offset_array(self, box_with_array): tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box).astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res = tdi + other tm.assert_equal(res, expected) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res2 = other + tdi tm.assert_equal(res2, expected) expected_sub = tm.box_expected(expected_sub, box_with_array).astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res_sub = tdi - other tm.assert_equal(res_sub, expected_sub) - def test_td64arr_with_offset_series(self, names, box_with_array): + def test_td64arr_with_offset_series( + self, performance_warning, names, box_with_array + ): # GH#18849 box = box_with_array box2 = Series if box in [Index, tm.to_array, pd.array] else box @@ -1358,11 +1361,11 @@ def test_td64arr_with_offset_series(self, names, box_with_array): obj = tm.box_expected(tdi, box) expected_add = tm.box_expected(expected_add, box2).astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res = obj + other tm.assert_equal(res, expected_add) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res2 = other + obj tm.assert_equal(res2, expected_add) @@ -1371,12 +1374,14 @@ def test_td64arr_with_offset_series(self, names, box_with_array): ) expected_sub = tm.box_expected(expected_sub, box2).astype(object) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): res3 = obj - other tm.assert_equal(res3, expected_sub) @pytest.mark.parametrize("obox", [np.array, Index, Series]) - def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box_with_array): + def test_td64arr_addsub_anchored_offset_arraylike( + self, performance_warning, obox, box_with_array + ): # GH#18824 tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"]) tdi = tm.box_expected(tdi, box_with_array) @@ -1387,22 +1392,22 @@ def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box_with_array): # a PerformanceWarning and _then_ raise a TypeError. msg = "has incorrect type|cannot add the type MonthEnd" with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): tdi + anchored with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): anchored + tdi with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): tdi - anchored with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): anchored - tdi # ------------------------------------------------------------------ # Unsorted - def test_td64arr_add_sub_object_array(self, box_with_array): + def test_td64arr_add_sub_object_array(self, performance_warning, box_with_array): box = box_with_array xbox = np.ndarray if box is pd.array else box @@ -1411,7 +1416,7 @@ def test_td64arr_add_sub_object_array(self, box_with_array): other = np.array([Timedelta(days=1), offsets.Day(2), Timestamp("2000-01-04")]) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = tdarr + other expected = Index( @@ -1422,10 +1427,10 @@ def test_td64arr_add_sub_object_array(self, box_with_array): msg = "unsupported operand type|cannot subtract a datelike" with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): tdarr - other - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = other - tdarr expected = Index([Timedelta(0), Timedelta(0), Timestamp("2000-01-01")]) @@ -1806,7 +1811,9 @@ def test_td64arr_floordiv_int(self, box_with_array): # TODO: operations with timedelta-like arrays, numeric arrays, # reversed ops - def test_td64arr_mod_tdscalar(self, box_with_array, three_days): + def test_td64arr_mod_tdscalar( + self, performance_warning, box_with_array, three_days + ): tdi = timedelta_range("1 Day", "9 days") tdarr = tm.box_expected(tdi, box_with_array) @@ -1816,15 +1823,15 @@ def test_td64arr_mod_tdscalar(self, box_with_array, three_days): result = tdarr % three_days tm.assert_equal(result, expected) - warn = None if box_with_array is DataFrame and isinstance(three_days, pd.DateOffset): - warn = PerformanceWarning # TODO: making expected be object here a result of DataFrame.__divmod__ # being defined in a naive way that does not dispatch to the underlying # array's __divmod__ expected = expected.astype(object) + else: + performance_warning = False - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(performance_warning): result = divmod(tdarr, three_days) tm.assert_equal(result[1], expected) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index b69fb573987f9..1bec614b9baec 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1005,10 +1005,12 @@ def test_complex_series_frame_alignment( assert res.shape == expected.shape tm.assert_frame_equal(res, expected) - def test_performance_warning_for_poor_alignment(self, engine, parser): + def test_performance_warning_for_poor_alignment( + self, performance_warning, engine, parser + ): df = DataFrame(np.random.default_rng(2).standard_normal((1000, 10))) s = Series(np.random.default_rng(2).standard_normal(10000)) - if engine == "numexpr": + if engine == "numexpr" and performance_warning: seen = PerformanceWarning else: seen = False @@ -1030,7 +1032,7 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): is_python_engine = engine == "python" - if not is_python_engine: + if not is_python_engine and performance_warning: wrn = PerformanceWarning else: wrn = False @@ -1038,7 +1040,7 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): with tm.assert_produces_warning(wrn) as w: pd.eval("df + s", engine=engine, parser=parser) - if not is_python_engine: + if not is_python_engine and performance_warning: assert len(w) == 1 msg = str(w[0].message) logged = np.log10(s.size - df.shape[1]) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index d8f14383ef114..6a51d42301a6c 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -17,8 +17,6 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning - import pandas as pd from pandas import SparseDtype import pandas._testing as tm @@ -237,7 +235,7 @@ def test_isna(self, data_missing): tm.assert_equal(sarr.isna(), expected) def test_fillna_limit_backfill(self, data_missing): - warns = (PerformanceWarning, FutureWarning) + warns = FutureWarning with tm.assert_produces_warning(warns, check_stacklevel=False): super().test_fillna_limit_backfill(data_missing) @@ -331,8 +329,8 @@ def test_where_series(self, data, na_value): expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) tm.assert_series_equal(result, expected) - def test_searchsorted(self, data_for_sorting, as_series): - with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False): + def test_searchsorted(self, performance_warning, data_for_sorting, as_series): + with tm.assert_produces_warning(performance_warning, check_stacklevel=False): super().test_searchsorted(data_for_sorting, as_series) def test_shift_0_periods(self, data): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 0373c15d15272..ca55af1a7402e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -12,7 +12,6 @@ from pandas._libs import iNaT from pandas.errors import ( InvalidIndexError, - PerformanceWarning, SettingWithCopyError, ) @@ -1528,7 +1527,7 @@ def test_iloc_ea_series_indexer_with_na(self): @pytest.mark.parametrize("indexer", [True, (True,)]) @pytest.mark.parametrize("dtype", [bool, "boolean"]) - def test_loc_bool_multiindex(self, dtype, indexer): + def test_loc_bool_multiindex(self, performance_warning, dtype, indexer): # GH#47687 midx = MultiIndex.from_arrays( [ @@ -1538,7 +1537,7 @@ def test_loc_bool_multiindex(self, dtype, indexer): names=["a", "b"], ) df = DataFrame({"c": [1, 2, 3, 4]}, index=midx) - with tm.maybe_produces_warning(PerformanceWarning, isinstance(indexer, tuple)): + with tm.maybe_produces_warning(performance_warning, isinstance(indexer, tuple)): result = df.loc[indexer] expected = DataFrame( {"c": [1, 2]}, index=Index([True, False], name="b", dtype=dtype) diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index b9fc5dc195026..3395004ce5df4 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -71,10 +71,10 @@ def test_insert_with_columns_dups(self): ) tm.assert_frame_equal(df, exp) - def test_insert_item_cache(self, using_copy_on_write): + def test_insert_item_cache(self, performance_warning, using_copy_on_write): df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) ser = df[0] - expected_warning = PerformanceWarning + expected_warning = PerformanceWarning if performance_warning else None with tm.assert_produces_warning(expected_warning): for n in range(100): diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index a3ae3991522c2..d9668ce46c943 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning - import pandas as pd from pandas import ( DataFrame, @@ -167,7 +165,7 @@ def test_drop(self): assert return_value is None tm.assert_frame_equal(df, expected) - def test_drop_multiindex_not_lexsorted(self): + def test_drop_multiindex_not_lexsorted(self, performance_warning): # GH#11640 # define the lexsorted version @@ -188,7 +186,7 @@ def test_drop_multiindex_not_lexsorted(self): assert not not_lexsorted_df.columns._is_lexsorted() expected = lexsorted_df.drop("a", axis=1).astype(float) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = not_lexsorted_df.drop("a", axis=1) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 22fff2116510a..c5262cf209533 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -7,8 +7,6 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning - import pandas as pd from pandas import ( Categorical, @@ -353,14 +351,16 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write, warn_copy_on_wri assert pd.isna(Y["g"]["c"]) @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") - def test_strange_column_corruption_issue(self, using_copy_on_write): + def test_strange_column_corruption_issue( + self, performance_warning, using_copy_on_write + ): # TODO(wesm): Unclear how exactly this is related to internal matters df = DataFrame(index=[0, 1]) df[0] = np.nan wasCol = {} with tm.assert_produces_warning( - PerformanceWarning, raise_on_extra_warnings=False + performance_warning, raise_on_extra_warnings=False ): for i, dt in enumerate(df.index): for col in range(100, 200): diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 2501427f985a9..c4708a50d3a03 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -6,7 +6,6 @@ import pytest from pandas._libs import lib -from pandas.errors import PerformanceWarning import pandas as pd from pandas import ( @@ -2167,7 +2166,9 @@ def test_unstack_unobserved_keys(self, future_stack): tm.assert_frame_equal(recons, df) @pytest.mark.slow - def test_unstack_number_of_levels_larger_than_int32(self, monkeypatch): + def test_unstack_number_of_levels_larger_than_int32( + self, performance_warning, monkeypatch + ): # GH#20601 # GH 26314: Change ValueError to PerformanceWarning @@ -2184,7 +2185,7 @@ def __init__(self, *args, **kwargs) -> None: index=[np.arange(2**16), np.arange(2**16)], ) msg = "The following operation may generate" - with tm.assert_produces_warning(PerformanceWarning, match=msg): + with tm.assert_produces_warning(performance_warning, match=msg): with pytest.raises(Exception, match="Don't compute final result."): df.unstack() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index a06d104e7e44c..d002e3edf19ef 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -6,10 +6,7 @@ import numpy as np import pytest -from pandas.errors import ( - PerformanceWarning, - SpecificationError, -) +from pandas.errors import SpecificationError import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_string_dtype @@ -1507,7 +1504,7 @@ def test_groupby_multiindex_missing_pair(): tm.assert_frame_equal(res, exp) -def test_groupby_multiindex_not_lexsorted(): +def test_groupby_multiindex_not_lexsorted(performance_warning): # GH 11640 # define the lexsorted version @@ -1528,7 +1525,7 @@ def test_groupby_multiindex_not_lexsorted(): assert not not_lexsorted_df.columns._is_lexsorted() expected = lexsorted_df.groupby("a").mean() - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): result = not_lexsorted_df.groupby("a").mean() tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/indexes/datetimes/methods/test_shift.py b/pandas/tests/indexes/datetimes/methods/test_shift.py index d8bdcc2a17685..375dea01974bb 100644 --- a/pandas/tests/indexes/datetimes/methods/test_shift.py +++ b/pandas/tests/indexes/datetimes/methods/test_shift.py @@ -152,13 +152,13 @@ def test_shift_bday(self, freq, unit): assert shifted[0] == rng[0] assert shifted.freq == rng.freq - def test_shift_bmonth(self, unit): + def test_shift_bmonth(self, performance_warning, unit): rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit) shifted = rng.shift(1, freq=pd.offsets.BDay()) assert shifted[0] == rng[0] + pd.offsets.BDay() rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit) - with tm.assert_produces_warning(pd.errors.PerformanceWarning): + with tm.assert_produces_warning(performance_warning): shifted = rng.shift(1, freq=pd.offsets.CDay()) assert shifted[0] == rng[0] + pd.offsets.CDay() diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 99c8ebb1e57b2..b83680be6a5ce 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning - import pandas as pd from pandas import ( Index, @@ -121,7 +119,7 @@ def test_droplevel_list(): index[:2].droplevel(["one", "four"]) -def test_drop_not_lexsorted(): +def test_drop_not_lexsorted(performance_warning): # GH 12078 # define the lexsorted version of the multi-index @@ -140,7 +138,7 @@ def test_drop_not_lexsorted(): # compare the results tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index f426a3ee42566..bc23788e20186 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -5,10 +5,7 @@ import pytest from pandas._libs import index as libindex -from pandas.errors import ( - InvalidIndexError, - PerformanceWarning, -) +from pandas.errors import InvalidIndexError import pandas as pd from pandas import ( @@ -749,7 +746,7 @@ def test_get_loc_duplicates2(self): assert index.get_loc("D") == slice(0, 3) - def test_get_loc_past_lexsort_depth(self): + def test_get_loc_past_lexsort_depth(self, performance_warning): # GH#30053 idx = MultiIndex( levels=[["a"], [0, 7], [1]], @@ -759,7 +756,7 @@ def test_get_loc_past_lexsort_depth(self): ) key = ("a", 7) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): # PerformanceWarning: indexing past lexsort depth may impact performance result = idx.get_loc(key) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 4a1a6b9c452d5..3d21ee8a57716 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -1,10 +1,7 @@ import numpy as np import pytest -from pandas.errors import ( - PerformanceWarning, - UnsortedIndexError, -) +from pandas.errors import UnsortedIndexError from pandas import ( CategoricalIndex, @@ -135,7 +132,7 @@ def test_unsortedindex(): df.loc(axis=0)["q", :] -def test_unsortedindex_doc_examples(): +def test_unsortedindex_doc_examples(performance_warning): # https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex dfm = DataFrame( { @@ -146,7 +143,7 @@ def test_unsortedindex_doc_examples(): ) dfm = dfm.set_index(["jim", "joe"]) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): dfm.loc[(1, "z")] msg = r"Key length \(2\) was greater than MultiIndex lexsort depth \(1\)" diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index de7d644698f2c..67b9ddebfb8bf 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -1,10 +1,7 @@ import numpy as np import pytest -from pandas.errors import ( - IndexingError, - PerformanceWarning, -) +from pandas.errors import IndexingError import pandas as pd from pandas import ( @@ -36,7 +33,7 @@ def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_dat df.loc[("bar", "two"), 1] = 7 assert df.loc[("bar", "two"), 1] == 7 - def test_loc_getitem_general(self, any_real_numpy_dtype): + def test_loc_getitem_general(self, performance_warning, any_real_numpy_dtype): # GH#2817 dtype = any_real_numpy_dtype data = { @@ -49,8 +46,7 @@ def test_loc_getitem_general(self, any_real_numpy_dtype): df = df.set_index(keys=["col", "num"]) key = 4.0, 12 - # emits a PerformanceWarning, ok - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): tm.assert_frame_equal(df.loc[key], df.iloc[2:]) # this is ok diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 36cc8316ea5ff..481a77fd03b05 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -2,7 +2,6 @@ import pytest import pandas._libs.index as libindex -from pandas.errors import PerformanceWarning import pandas as pd from pandas import ( @@ -17,7 +16,7 @@ class TestMultiIndexBasic: - def test_multiindex_perf_warn(self): + def test_multiindex_perf_warn(self, performance_warning): df = DataFrame( { "jim": [0, 0, 1, 1], @@ -26,11 +25,11 @@ def test_multiindex_perf_warn(self): } ).set_index(["jim", "joe"]) - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): df.loc[(1, "z")] df = df.iloc[[2, 1, 3, 0]] - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): df.loc[(0,)] @pytest.mark.parametrize("offset", [-5, 5]) diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index bc5f046b7fa33..d526697c7574a 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -192,7 +192,7 @@ def test_put_compression_blosc(setup_path): tm.assert_frame_equal(store["c"], df) -def test_put_mixed_type(setup_path): +def test_put_mixed_type(setup_path, performance_warning): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD"), dtype=object), @@ -215,7 +215,7 @@ def test_put_mixed_type(setup_path): with ensure_clean_store(setup_path) as store: _maybe_remove(store, "df") - with tm.assert_produces_warning(pd.errors.PerformanceWarning): + with tm.assert_produces_warning(performance_warning): store.put("df", df) expected = store.get("df") diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 4ba9787a5a6b9..51ee289c8e27a 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -287,14 +287,14 @@ def test_float_index(setup_path): _check_roundtrip(s, tm.assert_series_equal, path=setup_path) -def test_tuple_index(setup_path): +def test_tuple_index(setup_path, performance_warning): # GH #492 col = np.arange(10) idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)] data = np.random.default_rng(2).standard_normal(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) - with tm.assert_produces_warning(pd.errors.PerformanceWarning): + with tm.assert_produces_warning(performance_warning): _check_roundtrip(DF, tm.assert_frame_equal, path=setup_path) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index de56ab614dcd4..7719d6d5c93c8 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -103,7 +103,7 @@ def test_iter_empty(setup_path): assert list(store) == [] -def test_repr(setup_path): +def test_repr(setup_path, performance_warning): with ensure_clean_store(setup_path) as store: repr(store) store.info() @@ -138,7 +138,7 @@ def test_repr(setup_path): df.loc[df.index[3:6], ["obj1"]] = np.nan df = df._consolidate() - with tm.assert_produces_warning(pd.errors.PerformanceWarning): + with tm.assert_produces_warning(performance_warning): store["df"] = df # make a random group in hdf space diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 52bb9fa0f151b..fb6472f918d5d 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning - import pandas as pd from pandas import ( DataFrame, @@ -340,7 +338,7 @@ def test_concat_multiindex_(self): ) tm.assert_frame_equal(result_df, expected_df) - def test_concat_with_key_not_unique(self): + def test_concat_with_key_not_unique(self, performance_warning): # GitHub #46519 df1 = DataFrame({"name": [1]}) df2 = DataFrame({"name": [2]}) @@ -348,7 +346,7 @@ def test_concat_with_key_not_unique(self): df_a = concat([df1, df2, df3], keys=["x", "y", "x"]) # the warning is caused by indexing unsorted multi-index with tm.assert_produces_warning( - PerformanceWarning, match="indexing past lexsort depth" + performance_warning, match="indexing past lexsort depth" ): out_a = df_a.loc[("x", 0), :] @@ -356,7 +354,7 @@ def test_concat_with_key_not_unique(self): {"name": [1, 2, 3]}, index=Index([("x", 0), ("y", 0), ("x", 0)]) ) with tm.assert_produces_warning( - PerformanceWarning, match="indexing past lexsort depth" + performance_warning, match="indexing past lexsort depth" ): out_b = df_b.loc[("x", 0)] @@ -367,7 +365,7 @@ def test_concat_with_key_not_unique(self): df3 = DataFrame({"name": ["c", "d"]}) df_a = concat([df1, df2, df3], keys=["x", "y", "x"]) with tm.assert_produces_warning( - PerformanceWarning, match="indexing past lexsort depth" + performance_warning, match="indexing past lexsort depth" ): out_a = df_a.loc[("x", 0), :] @@ -380,7 +378,7 @@ def test_concat_with_key_not_unique(self): ).set_index(["a", "b"]) df_b.index.names = [None, None] with tm.assert_produces_warning( - PerformanceWarning, match="indexing past lexsort depth" + performance_warning, match="indexing past lexsort depth" ): out_b = df_b.loc[("x", 0), :] diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index fbc3d2b8a7c35..0f9233913dbd7 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -11,8 +11,6 @@ from pandas._config import using_pyarrow_string_dtype -from pandas.errors import PerformanceWarning - import pandas as pd from pandas import ( Categorical, @@ -2076,7 +2074,9 @@ def test_pivot_string_func_vs_func(self, f, f_numpy, data): tm.assert_frame_equal(result, expected) @pytest.mark.slow - def test_pivot_number_of_levels_larger_than_int32(self, monkeypatch): + def test_pivot_number_of_levels_larger_than_int32( + self, performance_warning, monkeypatch + ): # GH 20601 # GH 26314: Change ValueError to PerformanceWarning class MockUnstacker(reshape_lib._Unstacker): @@ -2092,7 +2092,7 @@ def __init__(self, *args, **kwargs) -> None: ) msg = "The following operation may generate" - with tm.assert_produces_warning(PerformanceWarning, match=msg): + with tm.assert_produces_warning(performance_warning, match=msg): with pytest.raises(Exception, match="Don't compute final result."): df.pivot_table( index="ind1", columns="ind2", values="count", aggfunc="count" diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 9f0994b968a47..1206a111f4b6f 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -4,7 +4,6 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning import pandas.util._test_decorators as td import pandas as pd @@ -388,10 +387,12 @@ def test_replace_mixed_object(): tm.assert_series_equal(result, expected) -def test_replace_unicode(any_string_dtype): +def test_replace_unicode(any_string_dtype, performance_warning): ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype) expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype) - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True) tm.assert_series_equal(result, expected) @@ -406,13 +407,15 @@ def test_replace_wrong_repl_type_raises(any_string_dtype, index_or_series, repl, obj.str.replace("a", repl) -def test_replace_callable(any_string_dtype): +def test_replace_callable(any_string_dtype, performance_warning): # GH 15055 ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) # test with callable repl = lambda m: m.group(0).swapcase() - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True) expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype) tm.assert_series_equal(result, expected) @@ -421,7 +424,7 @@ def test_replace_callable(any_string_dtype): @pytest.mark.parametrize( "repl", [lambda: None, lambda m, x: None, lambda m, x, y=None: None] ) -def test_replace_callable_raises(any_string_dtype, repl): +def test_replace_callable_raises(any_string_dtype, performance_warning, repl): # GH 15055 values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) @@ -430,36 +433,42 @@ def test_replace_callable_raises(any_string_dtype, repl): r"((takes)|(missing)) (?(2)from \d+ to )?\d+ " r"(?(3)required )positional arguments?" ) + if not using_pyarrow(any_string_dtype): + performance_warning = False with pytest.raises(TypeError, match=msg): - with tm.maybe_produces_warning( - PerformanceWarning, using_pyarrow(any_string_dtype) - ): + with tm.assert_produces_warning(performance_warning): values.str.replace("a", repl, regex=True) -def test_replace_callable_named_groups(any_string_dtype): +def test_replace_callable_named_groups(any_string_dtype, performance_warning): # test regex named groups ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype) pat = r"(?P\w+) (?P\w+) (?P\w+)" repl = lambda m: m.group("middle").swapcase() - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace(pat, repl, regex=True) expected = Series(["bAR", np.nan], dtype=any_string_dtype) tm.assert_series_equal(result, expected) -def test_replace_compiled_regex(any_string_dtype): +def test_replace_compiled_regex(any_string_dtype, performance_warning): # GH 15446 ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) # test with compiled regex pat = re.compile(r"BAD_*") - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace(pat, "", regex=True) expected = Series(["foobar", np.nan], dtype=any_string_dtype) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace(pat, "", n=1, regex=True) expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype) tm.assert_series_equal(result, expected) @@ -477,11 +486,13 @@ def test_replace_compiled_regex_mixed_object(): tm.assert_series_equal(result, expected) -def test_replace_compiled_regex_unicode(any_string_dtype): +def test_replace_compiled_regex_unicode(any_string_dtype, performance_warning): ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype) expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype) pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE) - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace(pat, ", ", regex=True) tm.assert_series_equal(result, expected) @@ -504,12 +515,14 @@ def test_replace_compiled_regex_raises(any_string_dtype): ser.str.replace(pat, "", case=True, regex=True) -def test_replace_compiled_regex_callable(any_string_dtype): +def test_replace_compiled_regex_callable(any_string_dtype, performance_warning): # test with callable ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) repl = lambda m: m.group(0).swapcase() pat = re.compile("[a-z][A-Z]{2}") - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace(pat, repl, n=2, regex=True) expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype) tm.assert_series_equal(result, expected) @@ -542,7 +555,7 @@ def test_replace_literal_compiled_raises(any_string_dtype): ser.str.replace(pat, "", regex=False) -def test_replace_moar(any_string_dtype): +def test_replace_moar(any_string_dtype, performance_warning): # PR #1179 ser = Series( ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"], @@ -556,7 +569,9 @@ def test_replace_moar(any_string_dtype): ) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace("A", "YYY", case=False) expected = Series( [ @@ -575,7 +590,9 @@ def test_replace_moar(any_string_dtype): ) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True) expected = Series( [ @@ -595,16 +612,20 @@ def test_replace_moar(any_string_dtype): tm.assert_series_equal(result, expected) -def test_replace_not_case_sensitive_not_regex(any_string_dtype): +def test_replace_not_case_sensitive_not_regex(any_string_dtype, performance_warning): # https://github.com/pandas-dev/pandas/issues/41602 ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype) - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace("a", "c", case=False, regex=False) expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.replace("a.", "c.", case=False, regex=False) expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype) tm.assert_series_equal(result, expected) @@ -747,7 +768,7 @@ def test_fullmatch_na_kwarg(any_string_dtype): tm.assert_series_equal(result, expected) -def test_fullmatch_case_kwarg(any_string_dtype): +def test_fullmatch_case_kwarg(any_string_dtype, performance_warning): ser = Series(["ab", "AB", "abc", "ABC"], dtype=any_string_dtype) expected_dtype = np.bool_ if any_string_dtype in object_pyarrow_numpy else "boolean" @@ -761,7 +782,9 @@ def test_fullmatch_case_kwarg(any_string_dtype): result = ser.str.fullmatch("ab", case=False) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning(PerformanceWarning, using_pyarrow(any_string_dtype)): + with tm.maybe_produces_warning( + performance_warning, using_pyarrow(any_string_dtype) + ): result = ser.str.fullmatch("ab", flags=re.IGNORECASE) tm.assert_series_equal(result, expected) @@ -932,7 +955,7 @@ def test_translate_mixed_object(): # -------------------------------------------------------------------------------------- -def test_flags_kwarg(any_string_dtype): +def test_flags_kwarg(any_string_dtype, performance_warning): data = { "Dave": "dave@google.com", "Steve": "steve@gmail.com", @@ -948,11 +971,11 @@ def test_flags_kwarg(any_string_dtype): result = data.str.extract(pat, flags=re.IGNORECASE, expand=True) assert result.iloc[0].tolist() == ["dave", "google", "com"] - with tm.maybe_produces_warning(PerformanceWarning, use_pyarrow): + with tm.maybe_produces_warning(performance_warning, use_pyarrow): result = data.str.match(pat, flags=re.IGNORECASE) assert result.iloc[0] - with tm.maybe_produces_warning(PerformanceWarning, use_pyarrow): + with tm.maybe_produces_warning(performance_warning, use_pyarrow): result = data.str.fullmatch(pat, flags=re.IGNORECASE) assert result.iloc[0] diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index b22dc0b330817..a355b947fc540 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -29,7 +29,6 @@ YearBegin, YearEnd, ) -from pandas.errors import PerformanceWarning from pandas import DatetimeIndex import pandas._testing as tm @@ -73,7 +72,7 @@ class TestDST: "microseconds", ] - def _test_all_offsets(self, n, **kwds): + def _test_all_offsets(self, n, performance_warning, **kwds): valid_offsets = ( self.valid_date_offsets_plural if n > 1 @@ -81,9 +80,16 @@ def _test_all_offsets(self, n, **kwds): ) for name in valid_offsets: - self._test_offset(offset_name=name, offset_n=n, **kwds) + self._test_offset( + offset_name=name, + offset_n=n, + performance_warning=performance_warning, + **kwds, + ) - def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): + def _test_offset( + self, offset_name, offset_n, tstart, expected_utc_offset, performance_warning + ): offset = DateOffset(**{offset_name: offset_n}) if ( @@ -105,7 +111,7 @@ def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): dti = DatetimeIndex([tstart]) warn_msg = "Non-vectorized DateOffset" with pytest.raises(pytz.AmbiguousTimeError, match=err_msg): - with tm.assert_produces_warning(PerformanceWarning, match=warn_msg): + with tm.assert_produces_warning(performance_warning, match=warn_msg): dti + offset return @@ -149,18 +155,19 @@ def _make_timestamp(self, string, hrs_offset, tz): offset_string = f"-{(hrs_offset * -1):02}00" return Timestamp(string + offset_string).tz_convert(tz) - def test_springforward_plural(self): + def test_springforward_plural(self, performance_warning): # test moving from standard to daylight savings for tz, utc_offsets in self.timezone_utc_offsets.items(): hrs_pre = utc_offsets["utc_offset_standard"] hrs_post = utc_offsets["utc_offset_daylight"] self._test_all_offsets( n=3, + performance_warning=performance_warning, tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), expected_utc_offset=hrs_post, ) - def test_fallback_singular(self): + def test_fallback_singular(self, performance_warning): # in the case of singular offsets, we don't necessarily know which utc # offset the new Timestamp will wind up in (the tz for 1 month may be # different from 1 second) so we don't specify an expected_utc_offset @@ -168,15 +175,17 @@ def test_fallback_singular(self): hrs_pre = utc_offsets["utc_offset_standard"] self._test_all_offsets( n=1, + performance_warning=performance_warning, tstart=self._make_timestamp(self.ts_pre_fallback, hrs_pre, tz), expected_utc_offset=None, ) - def test_springforward_singular(self): + def test_springforward_singular(self, performance_warning): for tz, utc_offsets in self.timezone_utc_offsets.items(): hrs_pre = utc_offsets["utc_offset_standard"] self._test_all_offsets( n=1, + performance_warning=performance_warning, tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), expected_utc_offset=None, ) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 72eff4b6ee479..4db686e378496 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -25,7 +25,6 @@ to_offset, ) from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG -from pandas.errors import PerformanceWarning from pandas import ( DataFrame, @@ -500,14 +499,15 @@ def test_add(self, offset_types, tz_naive_fixture, expecteds): assert isinstance(result, Timestamp) assert result == expected_localize - def test_add_empty_datetimeindex(self, offset_types, tz_naive_fixture): + def test_add_empty_datetimeindex( + self, performance_warning, offset_types, tz_naive_fixture + ): # GH#12724, GH#30336 offset_s = _create_offset(offset_types) dti = DatetimeIndex([], tz=tz_naive_fixture).as_unit("ns") - warn = None - if isinstance( + if not isinstance( offset_s, ( Easter, @@ -523,23 +523,31 @@ def test_add_empty_datetimeindex(self, offset_types, tz_naive_fixture): ), ): # We don't have an optimized apply_index - warn = PerformanceWarning + performance_warning = False # stacklevel checking is slow, and we have ~800 of variants of this # test, so let's only check the stacklevel in a subset of them check_stacklevel = tz_naive_fixture is None - with tm.assert_produces_warning(warn, check_stacklevel=check_stacklevel): + with tm.assert_produces_warning( + performance_warning, check_stacklevel=check_stacklevel + ): result = dti + offset_s tm.assert_index_equal(result, dti) - with tm.assert_produces_warning(warn, check_stacklevel=check_stacklevel): + with tm.assert_produces_warning( + performance_warning, check_stacklevel=check_stacklevel + ): result = offset_s + dti tm.assert_index_equal(result, dti) dta = dti._data - with tm.assert_produces_warning(warn, check_stacklevel=check_stacklevel): + with tm.assert_produces_warning( + performance_warning, check_stacklevel=check_stacklevel + ): result = dta + offset_s tm.assert_equal(result, dta) - with tm.assert_produces_warning(warn, check_stacklevel=check_stacklevel): + with tm.assert_produces_warning( + performance_warning, check_stacklevel=check_stacklevel + ): result = offset_s + dta tm.assert_equal(result, dta) @@ -1118,7 +1126,7 @@ def test_offset_multiplication( tm.assert_series_equal(resultarray, expectedarray) -def test_dateoffset_operations_on_dataframes(): +def test_dateoffset_operations_on_dataframes(performance_warning): # GH 47953 df = DataFrame({"T": [Timestamp("2019-04-30")], "D": [DateOffset(months=1)]}) frameresult1 = df["T"] + 26 * df["D"] @@ -1129,7 +1137,7 @@ def test_dateoffset_operations_on_dataframes(): } ) expecteddate = Timestamp("2021-06-30") - with tm.assert_produces_warning(PerformanceWarning): + with tm.assert_produces_warning(performance_warning): frameresult2 = df2["T"] + 26 * df2["D"] assert frameresult1[0] == expecteddate From f0de9beb6c0f98b3a9f20557ef284bf38e5648f3 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 6 Feb 2024 23:22:39 -0500 Subject: [PATCH 4/4] fixup --- pandas/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 2c89a27780d27..1083f715763b2 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1967,7 +1967,7 @@ def using_copy_on_write() -> bool: @pytest.fixture(params=[True, False]) -def performance_warning(request) -> bool: +def performance_warning(request) -> Iterator[bool | type[Warning]]: """ Fixture to check if performance warnings are enabled. Either produces ``PerformanceWarning`` if they are enabled, otherwise ``False``.