diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 2824a10de1d47..99ae60859b68c 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -448,6 +448,8 @@ ExtensionArray Other ^^^^^ - Bug in :class:`Index` constructor sometimes silently ignorning a specified ``dtype`` (:issue:`38879`) +- Bug in :func:`pandas.api.types.infer_dtype` not recognizing Series, Index or array with a period dtype (:issue:`23553`) +- Bug in :func:`pandas.api.types.infer_dtype` raising an error for general :class:`.ExtensionArray` objects. It will now return ``"unknown-array"`` instead of raising (:issue:`37367`) - Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`) - Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`) - ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6793ea0ab3bc2..3a11e7fbbdf33 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -69,6 +69,7 @@ from pandas._libs cimport util from pandas._libs.util cimport INT64_MAX, INT64_MIN, UINT64_MAX, is_nan from pandas._libs.tslib import array_to_datetime +from pandas._libs.tslibs.period import Period from pandas._libs.missing cimport ( C_NA, @@ -1082,6 +1083,7 @@ _TYPE_MAP = { "timedelta64[ns]": "timedelta64", "m": "timedelta64", "interval": "interval", + Period: "period", } # types only exist on certain platform @@ -1233,8 +1235,8 @@ cdef object _try_infer_map(object dtype): cdef: object val str attr - for attr in ["name", "kind", "base"]: - val = getattr(dtype, attr) + for attr in ["name", "kind", "base", "type"]: + val = getattr(dtype, attr, None) if val in _TYPE_MAP: return _TYPE_MAP[val] return None @@ -1275,6 +1277,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str: - time - period - mixed + - unknown-array Raises ------ @@ -1287,6 +1290,9 @@ def infer_dtype(value: object, skipna: bool = True) -> str: specialized - 'mixed-integer-float' are floats and integers - 'mixed-integer' are integers mixed with non-integers + - 'unknown-array' is the catchall for something that *is* an array (has + a dtype attribute), but has a dtype unknown to pandas (e.g. external + extension array) Examples -------- @@ -1355,12 +1361,10 @@ def infer_dtype(value: object, skipna: bool = True) -> str: # e.g. categoricals dtype = value.dtype if not isinstance(dtype, np.dtype): - value = _try_infer_map(value.dtype) - if value is not None: - return value - - # its ndarray-like but we can't handle - raise ValueError(f"cannot infer type for {type(value)}") + inferred = _try_infer_map(value.dtype) + if inferred is not None: + return inferred + return "unknown-array" # Unwrap Series/Index values = np.asarray(value) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 067fff3e0a744..0e6ffa637f1ae 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -202,11 +202,7 @@ def _validate(data): if isinstance(values.dtype, StringDtype): return "string" - try: - inferred_dtype = lib.infer_dtype(values, skipna=True) - except ValueError: - # GH#27571 mostly occurs with ExtensionArray - inferred_dtype = None + inferred_dtype = lib.infer_dtype(values, skipna=True) if inferred_dtype not in allowed_types: raise AttributeError("Can only use .str accessor with string values!") diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 415fe0309b073..0f4cef772458f 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -891,6 +891,19 @@ def test_infer_dtype_period(self): arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="M")]) assert lib.infer_dtype(arr, skipna=True) == "period" + @pytest.mark.parametrize("klass", [pd.array, pd.Series, pd.Index]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype_period_array(self, klass, skipna): + # https://github.com/pandas-dev/pandas/issues/23553 + values = klass( + [ + Period("2011-01-01", freq="D"), + Period("2011-01-02", freq="D"), + pd.NaT, + ] + ) + assert lib.infer_dtype(values, skipna=skipna) == "period" + def test_infer_dtype_period_mixed(self): arr = np.array( [Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index 5e4d23e91925a..b63af0c22b450 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -4,7 +4,7 @@ import pytest import pandas as pd -from pandas.api.types import is_object_dtype, is_string_dtype +from pandas.api.types import infer_dtype, is_object_dtype, is_string_dtype from pandas.tests.extension.base.base import BaseExtensionTests @@ -123,3 +123,11 @@ def test_get_common_dtype(self, dtype): # still testing as good practice to have this working (and it is the # only case we can test in general) assert dtype._get_common_dtype([dtype]) == dtype + + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype(self, data, data_missing, skipna): + # only testing that this works without raising an error + res = infer_dtype(data, skipna=skipna) + assert isinstance(res, str) + res = infer_dtype(data_missing, skipna=skipna) + assert isinstance(res, str) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 16278ec1ccc53..23b1ce250a5e5 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -7,6 +7,7 @@ import pandas as pd import pandas._testing as tm +from pandas.api.types import infer_dtype from pandas.tests.extension import base from pandas.tests.extension.decimal.array import ( DecimalArray, @@ -120,6 +121,13 @@ class TestDtype(BaseDecimal, base.BaseDtypeTests): def test_hashable(self, dtype): pass + @pytest.mark.parametrize("skipna", [True, False]) + def test_infer_dtype(self, data, data_missing, skipna): + # here overriding base test to ensure we fall back to return + # "unknown-array" for an EA pandas doesn't know + assert infer_dtype(data, skipna=skipna) == "unknown-array" + assert infer_dtype(data_missing, skipna=skipna) == "unknown-array" + class TestInterface(BaseDecimal, base.BaseInterfaceTests): pass diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 5adb3730a8c86..c72363b088a5c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -938,7 +938,8 @@ def test_unsupported(self, fp): # period df = pd.DataFrame({"a": pd.period_range("2013", freq="M", periods=3)}) - self.check_error_on_write(df, fp, ValueError, "cannot infer type for") + # error from fastparquet -> don't check exact error message + self.check_error_on_write(df, fp, ValueError, None) # mixed df = pd.DataFrame({"a": ["a", 1, 2.0]})