diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 29f360e050548..6361d22f0ba73 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1238,6 +1238,8 @@ Interval - Bug in :meth:`IntervalIndex.is_overlapping` incorrect output if interval has duplicate left boundaries (:issue:`49581`) - Bug in :meth:`Series.infer_objects` failing to infer :class:`IntervalDtype` for an object series of :class:`Interval` objects (:issue:`50090`) - Bug in :meth:`Series.shift` with :class:`IntervalDtype` and invalid null ``fill_value`` failing to raise ``TypeError`` (:issue:`51258`) +- Bug in :class:`IntervalDtype` where it accepted non-64-bit numeric subtypes, even though :class:`arrays.InterArray` only can hold numeric data if it is 64-bit. + Supplying non-64-bit numeric subtypes to :class:`IntervalDtype` now raises a ``TypeError`` (:issue:`45412`) - Indexing diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 3444ad77c2981..57a43dfbf9698 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -863,6 +863,58 @@ def is_int64_dtype(arr_or_dtype) -> bool: return _is_dtype_type(arr_or_dtype, classes(np.int64)) +def is_64bit_real_numeric_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of 64-bit dtype. + + Parameters + ---------- + arr_or_dtype : array-like or dtype + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of 64-bit dtype. + + Examples + -------- + >>> is_64bit_real_numeric_dtype(str) + False + >>> is_64bit_real_numeric_dtype(np.int32) + False + >>> is_64bit_real_numeric_dtype(np.int64) + True + >>> is_64bit_real_numeric_dtype(pd.Int64Dtype()) + True + >>> is_64bit_real_numeric_dtype("Int64") + True + >>> is_64bit_real_numeric_dtype('int8') + False + >>> is_64bit_real_numeric_dtype('Int8') + False + >>> is_64bit_real_numeric_dtype(float) + True + >>> is_64bit_real_numeric_dtype(np.uint64) + True + >>> is_64bit_real_numeric_dtype(np.array(['a', 'b'])) + False + >>> is_64bit_real_numeric_dtype(np.array([1, 2], dtype=np.int64)) + True + >>> is_64bit_real_numeric_dtype(pd.Index([1, 2.])) # float + True + >>> is_64bit_real_numeric_dtype(np.array([1, 2], dtype=np.uint32)) + False + """ + return _is_dtype_type( + arr_or_dtype, classes(np.int64, np.uint64, np.float64) + ) or _is_dtype( + arr_or_dtype, + lambda typ: isinstance(typ, ExtensionDtype) + and typ.type in (np.int64, np.uint64, np.float64), + ) + + def is_datetime64_any_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of the datetime64 dtype. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 33ff6d1eee686..2d67af4a8b5d2 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1082,6 +1082,8 @@ class IntervalDtype(PandasExtensionDtype): def __new__(cls, subtype=None, closed: str_type | None = None): from pandas.core.dtypes.common import ( + is_64bit_real_numeric_dtype, + is_any_real_numeric_dtype, is_string_dtype, pandas_dtype, ) @@ -1132,6 +1134,12 @@ def __new__(cls, subtype=None, closed: str_type | None = None): "for IntervalDtype" ) raise TypeError(msg) + elif is_any_real_numeric_dtype(subtype) and not is_64bit_real_numeric_dtype( + subtype + ): + raise TypeError( + f"numeric subtype must be 64-bit numeric dtype, was {subtype}" + ) key = f"{subtype}{closed}" try: diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index d054fb59d8561..aae22e86ca88e 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -8,6 +8,7 @@ from pandas.core.dtypes.base import _registry as registry from pandas.core.dtypes.common import ( + is_64bit_real_numeric_dtype, is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, @@ -16,6 +17,7 @@ is_datetime64tz_dtype, is_dtype_equal, is_interval_dtype, + is_numeric_dtype, is_period_dtype, is_string_dtype, ) @@ -597,6 +599,7 @@ def test_construction_generic(self, subtype): @pytest.mark.parametrize( "subtype", [ + *[x for x in tm.ALL_REAL_DTYPES if not is_64bit_real_numeric_dtype(x)], CategoricalDtype(list("abc"), False), CategoricalDtype(list("wxyz"), True), object, @@ -607,11 +610,14 @@ def test_construction_generic(self, subtype): ], ) def test_construction_not_supported(self, subtype): - # GH 19016 - msg = ( - "category, object, and string subtypes are not supported " - "for IntervalDtype" - ) + # GH19016, GH45412 + if is_numeric_dtype(subtype): + msg = "numeric subtype must be 64-bit numeric dtype, was" + else: + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalDtype" + ) with pytest.raises(TypeError, match=msg): IntervalDtype(subtype)