Skip to content

BUG: non-64-bit numeric dtypes should raise in IntervalDtype constructor #51322

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,8 @@ Interval
- Bug in :meth:`IntervalIndex.is_overlapping` incorrect output if interval has duplicate left boundaries (:issue:`49581`)
- Bug in :meth:`Series.infer_objects` failing to infer :class:`IntervalDtype` for an object series of :class:`Interval` objects (:issue:`50090`)
- Bug in :meth:`Series.shift` with :class:`IntervalDtype` and invalid null ``fill_value`` failing to raise ``TypeError`` (:issue:`51258`)
- Bug in :class:`IntervalDtype` where it accepted non-64-bit numeric subtypes, even though :class:`arrays.InterArray` only can hold numeric data if it is 64-bit.
Supplying non-64-bit numeric subtypes to :class:`IntervalDtype` now raises a ``TypeError`` (:issue:`45412`)
-

Indexing
Expand Down
52 changes: 52 additions & 0 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,58 @@ def is_int64_dtype(arr_or_dtype) -> bool:
return _is_dtype_type(arr_or_dtype, classes(np.int64))


def is_64bit_real_numeric_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of 64-bit dtype.

Parameters
----------
arr_or_dtype : array-like or dtype
The array or dtype to check.

Returns
-------
boolean
Whether or not the array or dtype is of 64-bit dtype.

Examples
--------
>>> is_64bit_real_numeric_dtype(str)
False
>>> is_64bit_real_numeric_dtype(np.int32)
False
>>> is_64bit_real_numeric_dtype(np.int64)
True
>>> is_64bit_real_numeric_dtype(pd.Int64Dtype())
True
>>> is_64bit_real_numeric_dtype("Int64")
True
>>> is_64bit_real_numeric_dtype('int8')
False
>>> is_64bit_real_numeric_dtype('Int8')
False
>>> is_64bit_real_numeric_dtype(float)
True
>>> is_64bit_real_numeric_dtype(np.uint64)
True
>>> is_64bit_real_numeric_dtype(np.array(['a', 'b']))
False
>>> is_64bit_real_numeric_dtype(np.array([1, 2], dtype=np.int64))
True
>>> is_64bit_real_numeric_dtype(pd.Index([1, 2.])) # float
True
>>> is_64bit_real_numeric_dtype(np.array([1, 2], dtype=np.uint32))
False
"""
return _is_dtype_type(
arr_or_dtype, classes(np.int64, np.uint64, np.float64)
) or _is_dtype(
arr_or_dtype,
lambda typ: isinstance(typ, ExtensionDtype)
and typ.type in (np.int64, np.uint64, np.float64),
)


def is_datetime64_any_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of the datetime64 dtype.
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,6 +1082,8 @@ class IntervalDtype(PandasExtensionDtype):

def __new__(cls, subtype=None, closed: str_type | None = None):
from pandas.core.dtypes.common import (
is_64bit_real_numeric_dtype,
is_any_real_numeric_dtype,
is_string_dtype,
pandas_dtype,
)
Expand Down Expand Up @@ -1132,6 +1134,12 @@ def __new__(cls, subtype=None, closed: str_type | None = None):
"for IntervalDtype"
)
raise TypeError(msg)
elif is_any_real_numeric_dtype(subtype) and not is_64bit_real_numeric_dtype(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wont this allow through e.g. Int64 that we don't want? this seems a lot heavier than just checking itemsize

subtype
):
raise TypeError(
f"numeric subtype must be 64-bit numeric dtype, was {subtype}"
)

key = f"{subtype}{closed}"
try:
Expand Down
16 changes: 11 additions & 5 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pandas.core.dtypes.base import _registry as registry
from pandas.core.dtypes.common import (
is_64bit_real_numeric_dtype,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_any_dtype,
Expand All @@ -16,6 +17,7 @@
is_datetime64tz_dtype,
is_dtype_equal,
is_interval_dtype,
is_numeric_dtype,
is_period_dtype,
is_string_dtype,
)
Expand Down Expand Up @@ -597,6 +599,7 @@ def test_construction_generic(self, subtype):
@pytest.mark.parametrize(
"subtype",
[
*[x for x in tm.ALL_REAL_DTYPES if not is_64bit_real_numeric_dtype(x)],
CategoricalDtype(list("abc"), False),
CategoricalDtype(list("wxyz"), True),
object,
Expand All @@ -607,11 +610,14 @@ def test_construction_generic(self, subtype):
],
)
def test_construction_not_supported(self, subtype):
# GH 19016
msg = (
"category, object, and string subtypes are not supported "
"for IntervalDtype"
)
# GH19016, GH45412
if is_numeric_dtype(subtype):
msg = "numeric subtype must be 64-bit numeric dtype, was"
else:
msg = (
"category, object, and string subtypes are not supported "
"for IntervalDtype"
)
with pytest.raises(TypeError, match=msg):
IntervalDtype(subtype)

Expand Down