From a86a7ff31513b696b66191264b7678505b322326 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 18 Aug 2020 19:31:01 -0500 Subject: [PATCH 1/9] Fix is_categorical_dtype for Sparse[category] --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/dtypes/common.py | 5 +++-- pandas/tests/dtypes/test_common.py | 9 +++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 42f95d88d74ac..99da1591ef037 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -165,7 +165,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- +- Bug in :func:`pandas.core.dtypes.common.is_categorical_dtype` where categorical ``SparseArray`` dtypes would return ``False`` (:issue:`35793`) - Datetimelike diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 1e70ff90fcd44..54f3e356388e6 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -558,8 +558,9 @@ def is_categorical_dtype(arr_or_dtype) -> bool: """ if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object - return arr_or_dtype.name == "category" - + return "category" in arr_or_dtype.name + if hasattr(arr_or_dtype, "dtype"): + return is_categorical_dtype(arr_or_dtype.dtype) if arr_or_dtype is None: return False return CategoricalDtype.is_dtype(arr_or_dtype) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a6c526fcb008a..951b253c10b91 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -213,6 +213,15 @@ def test_is_categorical_deprecation(): com.is_categorical([1, 2, 3]) +def test_sparse_categorical_is_categorical(): + # https://github.com/pandas-dev/pandas/issues/35793 + s = pd.Series( + ["a", "b", "c"], dtype=pd.SparseDtype(CategoricalDtype(["a", "b", "c"])) + ) + assert com.is_categorical_dtype(s) + assert com.is_categorical_dtype(s.dtype) + + def test_is_datetime64_dtype(): assert not com.is_datetime64_dtype(object) assert not com.is_datetime64_dtype([1, 2, 3]) From 29241d209dbc7299feed3065138157b78196546f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 18 Aug 2020 20:02:38 -0500 Subject: [PATCH 2/9] Fixup --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 99da1591ef037..9fb24c0e18dc7 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -165,7 +165,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- Bug in :func:`pandas.core.dtypes.common.is_categorical_dtype` where categorical ``SparseArray`` dtypes would return ``False`` (:issue:`35793`) +- Bug in :func:`pandas.core.dtypes.common.is_categorical_dtype` where sparse categorical dtypes would return ``False`` (:issue:`35793`) - Datetimelike From b52a7eab2340031895985f6c25cc9dba0e013123 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 18 Aug 2020 20:20:42 -0500 Subject: [PATCH 3/9] Fixes --- pandas/core/dtypes/base.py | 2 ++ pandas/core/dtypes/common.py | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 07c73876954d0..cef2f3aeabd79 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -289,6 +289,8 @@ def is_dtype(cls, dtype: object) -> bool: return False elif isinstance(dtype, cls): return True + elif hasattr(dtype, "subtype") and isinstance(dtype.subtype, cls): + return True if isinstance(dtype, str): try: return cls.construct_from_string(dtype) is not None diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 54f3e356388e6..1d6b636a445f4 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -558,9 +558,7 @@ def is_categorical_dtype(arr_or_dtype) -> bool: """ if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object - return "category" in arr_or_dtype.name - if hasattr(arr_or_dtype, "dtype"): - return is_categorical_dtype(arr_or_dtype.dtype) + return ("category" == arr_or_dtype.name) or ("category" in arr_or_dtype.name) if arr_or_dtype is None: return False return CategoricalDtype.is_dtype(arr_or_dtype) From 41d58a041e5f59863b10ca98c74ee755d6253ec4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 15 Sep 2020 14:08:28 -0500 Subject: [PATCH 4/9] mypy --- pandas/core/dtypes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 2074ed344692d..1959f7bfdd571 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -287,7 +287,7 @@ def is_dtype(cls, dtype: object) -> bool: return False elif isinstance(dtype, cls): return True - elif hasattr(dtype, "subtype") and isinstance(dtype.subtype, cls): + elif isinstance(getattr(dtype, "subtype", dtype), cls): return True if isinstance(dtype, str): try: From 6c482bc41888fda6d740e0cdd5485e565ddaf8ea Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 15 Sep 2020 14:24:30 -0500 Subject: [PATCH 5/9] Type --- pandas/core/dtypes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 1959f7bfdd571..07c9c85c5830e 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -287,7 +287,7 @@ def is_dtype(cls, dtype: object) -> bool: return False elif isinstance(dtype, cls): return True - elif isinstance(getattr(dtype, "subtype", dtype), cls): + elif hasattr(dtype, "subtype") and isinstance(dtype.subtype, cls): # type: ignore[attr-defined] return True if isinstance(dtype, str): try: From 582e6e4af57657631a71cac272d8d4f34575dae4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 15 Sep 2020 14:26:43 -0500 Subject: [PATCH 6/9] Lint --- pandas/core/dtypes/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 07c9c85c5830e..4d7e78a0f46e8 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -287,7 +287,9 @@ def is_dtype(cls, dtype: object) -> bool: return False elif isinstance(dtype, cls): return True - elif hasattr(dtype, "subtype") and isinstance(dtype.subtype, cls): # type: ignore[attr-defined] + elif hasattr(dtype, "subtype") and isinstance( + dtype.subtype, cls # type: ignore[attr-defined] + ): return True if isinstance(dtype, str): try: From 19b0aae1c7042f432760cd3156cbc73374a85c88 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 15 Sep 2020 18:23:09 -0500 Subject: [PATCH 7/9] Update --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/dtypes/common.py | 4 +++- pandas/tests/dtypes/test_common.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 961924733a20e..c8a2c4ef45023 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -233,7 +233,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- Bug in :func:`pandas.core.dtypes.common.is_categorical_dtype` where sparse categorical dtypes would return ``False`` (:issue:`35793`) +- Bug in :func:`pandas.dtypes.is_categorical_dtype` where sparse categorical dtypes would return ``False`` (:issue:`35793`) - Datetimelike diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 44052bcf5f293..223fe6bd92f12 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -558,7 +558,9 @@ def is_categorical_dtype(arr_or_dtype) -> bool: """ if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object - return ("category" == arr_or_dtype.name) or ("category" in arr_or_dtype.name) + return ("category" == arr_or_dtype.name) or ( + is_sparse(arr_or_dtype) and arr_or_dtype.subtype == "category" # type: ignore[attr-defined] + ) if arr_or_dtype is None: return False return CategoricalDtype.is_dtype(arr_or_dtype) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index dda8f494f787f..e36b4ae17ed0e 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -213,7 +213,7 @@ def test_is_categorical_deprecation(): com.is_categorical([1, 2, 3]) -def test_sparse_categorical_is_categorical(): +def test_is_categorical_sparse_categorical(): # https://github.com/pandas-dev/pandas/issues/35793 s = pd.Series( ["a", "b", "c"], dtype=pd.SparseDtype(CategoricalDtype(["a", "b", "c"])) From 5a031aa5b6a787c1a962b89ecb17cc3b53e47cb4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 15 Sep 2020 20:11:47 -0500 Subject: [PATCH 8/9] Fix --- pandas/core/dtypes/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 223fe6bd92f12..bc94d4ae422d4 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -559,7 +559,8 @@ def is_categorical_dtype(arr_or_dtype) -> bool: if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object return ("category" == arr_or_dtype.name) or ( - is_sparse(arr_or_dtype) and arr_or_dtype.subtype == "category" # type: ignore[attr-defined] + is_sparse(arr_or_dtype) + and arr_or_dtype.subtype.name == "category" # type: ignore[attr-defined] ) if arr_or_dtype is None: return False From a4388d34afaed1b589966401345731b999cff7d7 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sat, 19 Sep 2020 08:01:09 -0500 Subject: [PATCH 9/9] Update doc/source/whatsnew/v1.2.0.rst Co-authored-by: Joris Van den Bossche --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 88202659c956b..109dbd5839756 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -236,7 +236,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- Bug in :func:`pandas.dtypes.is_categorical_dtype` where sparse categorical dtypes would return ``False`` (:issue:`35793`) +- Bug in :func:`pandas.api.types.is_categorical_dtype` where sparse categorical dtypes would return ``False`` (:issue:`35793`) - Datetimelike