From b41c643e6e19c7f98cb730c3e76302c12d128b6a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Oct 2022 15:58:42 -0700 Subject: [PATCH 1/4] DEPR: Disallow indexing an Index with a float --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/common.py | 16 +++++----------- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/tests/indexes/test_indexing.py | 11 ++++------- 5 files changed, 12 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 72f08ec90f5e8..ba78727283dae 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -286,6 +286,7 @@ Removal of prior version deprecations/changes - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`) - Renamed ``fname`` to ``path`` in :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata` and :meth:`DataFrame.to_feather` (:issue:`30338`) - Enforced disallowing indexing a :class:`Series` with a single item list with a slice (e.g. ``ser[[slice(0, 2)]]``). Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) +- Enforced disallowing indexing an :class:`Index` object with a float key which will raise an ``IndexError`` (:issue:`34191`). - Enforced the ``display.max_colwidth`` option to not accept negative integers (:issue:`31569`) - Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`) - Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`) diff --git a/pandas/core/common.py b/pandas/core/common.py index 817b889623d99..a72899398874f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -148,15 +148,13 @@ def is_bool_indexer(key: Any) -> bool: return False -def cast_scalar_indexer(val, warn_float: bool = False): +def cast_scalar_indexer(val): """ To avoid numpy DeprecationWarnings, cast float to integer where valid. Parameters ---------- val : scalar - warn_float : bool, default False - If True, issue deprecation warning for a float indexer. Returns ------- @@ -164,14 +162,10 @@ def cast_scalar_indexer(val, warn_float: bool = False): """ # assumes lib.is_scalar(val) if lib.is_float(val) and val.is_integer(): - if warn_float: - warnings.warn( - "Indexing with a float is deprecated, and will raise an IndexError " - "in pandas 2.0. You can manually convert to an integer key instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return int(val) + raise IndexError( + "Indexing with a float is not allowed. Manually convert " + f"to {val} to an integer key instead.", + ) return val diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d8300bb29c274..58c3be39de7fb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5221,7 +5221,7 @@ def __getitem__(self, key): if is_integer(key) or is_float(key): # GH#44051 exclude bool, which would return a 2d ndarray - key = com.cast_scalar_indexer(key, warn_float=True) + key = com.cast_scalar_indexer(key) return getitem(key) if isinstance(key, slice): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3b8380a88bb8b..32db99ab08c1f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2031,7 +2031,7 @@ def __reduce__(self): def __getitem__(self, key): if is_scalar(key): - key = com.cast_scalar_indexer(key, warn_float=True) + key = com.cast_scalar_indexer(key) retval = [] for lev, level_codes in zip(self.levels, self.codes): diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index 2b7c5745e0c67..e2ca6c5de06b7 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -287,14 +287,11 @@ def test_putmask_with_wrong_mask(self, index): @pytest.mark.parametrize( "idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])] ) -def test_getitem_deprecated_float(idx): - # https://github.com/pandas-dev/pandas/issues/34191 +def test_getitem_raises_float(idx): + # https://github.com/pandas-dev/pandas/issues/34191; enforced 2.0 - with tm.assert_produces_warning(FutureWarning): - result = idx[1.0] - - expected = idx[1] - assert result == expected + with pytest.raises(IndexError, match="Indexing with a float is not allowed"): + idx[1.0] @pytest.mark.parametrize( From 0b0bf1ce0bb7e2c1e3c08e7f30b4da83bcd748fe Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 1 Nov 2022 16:57:53 -0700 Subject: [PATCH 2/4] DEPR: Remove check_less_precise in asserters --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/_testing/asserters.py | 159 ------------------ pandas/tests/frame/test_reductions.py | 2 - pandas/tests/util/test_assert_almost_equal.py | 12 +- 4 files changed, 2 insertions(+), 172 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index ba78727283dae..9e5ff8b93bb79 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -185,6 +185,7 @@ Removal of prior version deprecations/changes - Removed deprecated :meth:`.Styler.where` (:issue:`49397`) - Removed deprecated :meth:`.Styler.render` (:issue:`49397`) - Removed deprecated argument ``null_color`` in :meth:`.Styler.highlight_null` (:issue:`49397`) +- Removed deprecated argument ``check_less_precise`` in :meth:`.testing.assert_frame_equal`, :meth:`.testing.assert_extension_array_equal`, :meth:`.testing.assert_series_equal`, :meth:`.testing.assert_index_equal` (:issue:`30562`) - Removed deprecated ``null_counts`` argument in :meth:`DataFrame.info`. Use ``show_counts`` instead (:issue:`37999`) - Enforced deprecation disallowing passing a timezone-aware :class:`Timestamp` and ``dtype="datetime64[ns]"`` to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`) - Enforced deprecation disallowing passing a sequence of timezone-aware values and ``dtype="datetime64[ns]"`` to to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 1f690b39e6fb8..d0a95e764472d 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -4,18 +4,12 @@ Literal, cast, ) -import warnings import numpy as np -from pandas._libs.lib import ( - NoDefault, - no_default, -) from pandas._libs.missing import is_matching_na from pandas._libs.sparse import SparseIndex import pandas._libs.testing as _testing -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_bool, @@ -64,7 +58,6 @@ def assert_almost_equal( left, right, check_dtype: bool | Literal["equiv"] = "equiv", - check_less_precise: bool | int | NoDefault = no_default, rtol: float = 1.0e-5, atol: float = 1.0e-8, **kwargs, @@ -83,20 +76,6 @@ def assert_almost_equal( Check dtype if both a and b are the same type. If 'equiv' is passed in, then `RangeIndex` and `Int64Index` are also considered equivalent when doing type checking. - check_less_precise : bool or int, default False - Specify comparison precision. 5 digits (False) or 3 digits (True) - after decimal points are compared. If int, then specify the number - of digits to compare. - - When comparing two numbers, if the first number has magnitude less - than 1e-5, we compare the two numbers directly and check whether - they are equivalent within the specified precision. Otherwise, we - compare the **ratio** of the second number to the first number and - check whether it is equivalent to 1 within the specified precision. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. rtol : float, default 1e-5 Relative tolerance. @@ -106,16 +85,6 @@ def assert_almost_equal( .. versionadded:: 1.1.0 """ - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - if isinstance(left, Index): assert_index_equal( left, @@ -171,46 +140,6 @@ def assert_almost_equal( ) -def _get_tol_from_less_precise(check_less_precise: bool | int) -> float: - """ - Return the tolerance equivalent to the deprecated `check_less_precise` - parameter. - - Parameters - ---------- - check_less_precise : bool or int - - Returns - ------- - float - Tolerance to be used as relative/absolute tolerance. - - Examples - -------- - >>> # Using check_less_precise as a bool: - >>> _get_tol_from_less_precise(False) - 5e-06 - >>> _get_tol_from_less_precise(True) - 0.0005 - >>> # Using check_less_precise as an int representing the decimal - >>> # tolerance intended: - >>> _get_tol_from_less_precise(2) - 0.005 - >>> _get_tol_from_less_precise(8) - 5e-09 - """ - if isinstance(check_less_precise, bool): - if check_less_precise: - # 3-digit tolerance - return 0.5e-3 - else: - # 5-digit tolerance - return 0.5e-5 - else: - # Equivalent to setting checking_less_precise= - return 0.5 * 10**-check_less_precise - - def _check_isinstance(left, right, cls): """ Helper method for our assert_* methods that ensures that @@ -250,7 +179,6 @@ def assert_index_equal( right: Index, exact: bool | str = "equiv", check_names: bool = True, - check_less_precise: bool | int | NoDefault = no_default, check_exact: bool = True, check_categorical: bool = True, check_order: bool = True, @@ -271,14 +199,6 @@ def assert_index_equal( Int64Index as well. check_names : bool, default True Whether to check the names attribute. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. check_exact : bool, default True Whether to compare number exactly. check_categorical : bool, default True @@ -333,16 +253,6 @@ def _get_ilevel_values(index, level): filled = take_nd(unique._values, level_codes, fill_value=unique._na_value) return unique._shallow_copy(filled, name=index.names[level]) - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - # instance validation _check_isinstance(left, right, Index) @@ -775,7 +685,6 @@ def assert_extension_array_equal( right, check_dtype: bool | Literal["equiv"] = True, index_values=None, - check_less_precise=no_default, check_exact: bool = False, rtol: float = 1.0e-5, atol: float = 1.0e-8, @@ -791,14 +700,6 @@ def assert_extension_array_equal( Whether to check if the ExtensionArray dtypes are identical. index_values : numpy.ndarray, default None Optional index (shared by both left and right), used in output. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. check_exact : bool, default False Whether to compare number exactly. rtol : float, default 1e-5 @@ -823,16 +724,6 @@ def assert_extension_array_equal( >>> b, c = a.array, a.array >>> tm.assert_extension_array_equal(b, c) """ - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" if check_dtype: @@ -881,7 +772,6 @@ def assert_series_equal( check_dtype: bool | Literal["equiv"] = True, check_index_type: bool | Literal["equiv"] = "equiv", check_series_type: bool = True, - check_less_precise: bool | int | NoDefault = no_default, check_names: bool = True, check_exact: bool = False, check_datetimelike_compat: bool = False, @@ -910,20 +800,6 @@ def assert_series_equal( are identical. check_series_type : bool, default True Whether to check the Series class is identical. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - When comparing two numbers, if the first number has magnitude less - than 1e-5, we compare the two numbers directly and check whether - they are equivalent within the specified precision. Otherwise, we - compare the **ratio** of the second number to the first number and - check whether it is equivalent to 1 within the specified precision. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. check_names : bool, default True Whether to check the Series and Index names attribute. check_exact : bool, default False @@ -978,16 +854,6 @@ def assert_series_equal( if not check_index and check_like: raise ValueError("check_like must be False if check_index is False") - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - # instance validation _check_isinstance(left, right, Series) @@ -1150,7 +1016,6 @@ def assert_frame_equal( check_index_type: bool | Literal["equiv"] = "equiv", check_column_type: bool | Literal["equiv"] = "equiv", check_frame_type: bool = True, - check_less_precise=no_default, check_names: bool = True, by_blocks: bool = False, check_exact: bool = False, @@ -1188,20 +1053,6 @@ def assert_frame_equal( :func:`assert_index_equal`. check_frame_type : bool, default True Whether to check the DataFrame class is identical. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - When comparing two numbers, if the first number has magnitude less - than 1e-5, we compare the two numbers directly and check whether - they are equivalent within the specified precision. Otherwise, we - compare the **ratio** of the second number to the first number and - check whether it is equivalent to 1 within the specified precision. - - .. deprecated:: 1.1.0 - Use `rtol` and `atol` instead to define relative/absolute - tolerance, respectively. Similar to :func:`math.isclose`. check_names : bool, default True Whether to check that the `names` attribute for both the `index` and `column` attributes of the DataFrame is identical. @@ -1271,16 +1122,6 @@ def assert_frame_equal( """ __tracebackhide__ = True - if check_less_precise is not no_default: - warnings.warn( - "The 'check_less_precise' keyword in testing.assert_*_equal " - "is deprecated and will be removed in a future version. " - "You can stop passing 'check_less_precise' to silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - rtol = atol = _get_tol_from_less_precise(check_less_precise) - # instance validation _check_isinstance(left, right, DataFrame) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 605dfb2551410..d437e4093eaf6 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -228,8 +228,6 @@ def sem(x): check_dates=True, ) - # GH#32571 check_less_precise is needed on apparently-random - # py37-npdev builds and OSX-PY36-min_version builds # mixed types (with upcasting happening) assert_stat_op_calc( "sum", diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py index ab53707771be6..ba52536e246d0 100644 --- a/pandas/tests/util/test_assert_almost_equal.py +++ b/pandas/tests/util/test_assert_almost_equal.py @@ -69,16 +69,6 @@ def _assert_not_almost_equal_both(a, b, **kwargs): _assert_not_almost_equal(b, a, **kwargs) -@pytest.mark.parametrize( - "a,b,check_less_precise", - [(1.1, 1.1, False), (1.1, 1.100001, True), (1.1, 1.1001, 2)], -) -def test_assert_almost_equal_deprecated(a, b, check_less_precise): - # GH#30562 - with tm.assert_produces_warning(FutureWarning): - _assert_almost_equal_both(a, b, check_less_precise=check_less_precise) - - @pytest.mark.parametrize( "a,b", [ @@ -122,7 +112,7 @@ def test_assert_not_almost_equal_numbers(a, b): ], ) def test_assert_almost_equal_numbers_atol(a, b): - # Equivalent to the deprecated check_less_precise=True + # Equivalent to the deprecated check_less_precise=True, enforced in 2.0 _assert_almost_equal_both(a, b, rtol=0.5e-3, atol=0.5e-3) From f8416d0c37f650092521697c992b26059d0a1109 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 1 Nov 2022 17:02:02 -0700 Subject: [PATCH 3/4] Revert "DEPR: Disallow indexing an Index with a float" This reverts commit 3781d86a0016c8c3a7bccdcc926ebba3520663ce. --- doc/source/whatsnew/v2.0.0.rst | 1 - pandas/core/common.py | 16 +++++++++++----- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/tests/indexes/test_indexing.py | 11 +++++++---- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9e5ff8b93bb79..9b41c4e32d3b0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -287,7 +287,6 @@ Removal of prior version deprecations/changes - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`) - Renamed ``fname`` to ``path`` in :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata` and :meth:`DataFrame.to_feather` (:issue:`30338`) - Enforced disallowing indexing a :class:`Series` with a single item list with a slice (e.g. ``ser[[slice(0, 2)]]``). Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) -- Enforced disallowing indexing an :class:`Index` object with a float key which will raise an ``IndexError`` (:issue:`34191`). - Enforced the ``display.max_colwidth`` option to not accept negative integers (:issue:`31569`) - Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`) - Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`) diff --git a/pandas/core/common.py b/pandas/core/common.py index a72899398874f..817b889623d99 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -148,13 +148,15 @@ def is_bool_indexer(key: Any) -> bool: return False -def cast_scalar_indexer(val): +def cast_scalar_indexer(val, warn_float: bool = False): """ To avoid numpy DeprecationWarnings, cast float to integer where valid. Parameters ---------- val : scalar + warn_float : bool, default False + If True, issue deprecation warning for a float indexer. Returns ------- @@ -162,10 +164,14 @@ def cast_scalar_indexer(val): """ # assumes lib.is_scalar(val) if lib.is_float(val) and val.is_integer(): - raise IndexError( - "Indexing with a float is not allowed. Manually convert " - f"to {val} to an integer key instead.", - ) + if warn_float: + warnings.warn( + "Indexing with a float is deprecated, and will raise an IndexError " + "in pandas 2.0. You can manually convert to an integer key instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return int(val) return val diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 58c3be39de7fb..d8300bb29c274 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5221,7 +5221,7 @@ def __getitem__(self, key): if is_integer(key) or is_float(key): # GH#44051 exclude bool, which would return a 2d ndarray - key = com.cast_scalar_indexer(key) + key = com.cast_scalar_indexer(key, warn_float=True) return getitem(key) if isinstance(key, slice): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 32db99ab08c1f..3b8380a88bb8b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2031,7 +2031,7 @@ def __reduce__(self): def __getitem__(self, key): if is_scalar(key): - key = com.cast_scalar_indexer(key) + key = com.cast_scalar_indexer(key, warn_float=True) retval = [] for lev, level_codes in zip(self.levels, self.codes): diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index e2ca6c5de06b7..2b7c5745e0c67 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -287,11 +287,14 @@ def test_putmask_with_wrong_mask(self, index): @pytest.mark.parametrize( "idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])] ) -def test_getitem_raises_float(idx): - # https://github.com/pandas-dev/pandas/issues/34191; enforced 2.0 +def test_getitem_deprecated_float(idx): + # https://github.com/pandas-dev/pandas/issues/34191 - with pytest.raises(IndexError, match="Indexing with a float is not allowed"): - idx[1.0] + with tm.assert_produces_warning(FutureWarning): + result = idx[1.0] + + expected = idx[1] + assert result == expected @pytest.mark.parametrize( From 535c7491d7426484036fd3b31c1755a48a2ea587 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 1 Nov 2022 18:02:11 -0700 Subject: [PATCH 4/4] Restore & refactor comment --- pandas/tests/frame/test_reductions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index d437e4093eaf6..8d4d705296f35 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -228,6 +228,7 @@ def sem(x): check_dates=True, ) + # GH#32571: rol needed for flaky CI builds # mixed types (with upcasting happening) assert_stat_op_calc( "sum",