diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 72cc28c1dd66d..b5be0263c492a 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1408,11 +1408,12 @@ def is_1d_only_ea_obj(obj: Any) -> bool: from pandas.core.arrays import ( DatetimeArray, ExtensionArray, + PeriodArray, TimedeltaArray, ) return isinstance(obj, ExtensionArray) and not isinstance( - obj, (DatetimeArray, TimedeltaArray) + obj, (DatetimeArray, TimedeltaArray, PeriodArray) ) @@ -1424,7 +1425,9 @@ def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool: # here too. # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype # to exclude ArrowTimestampUSDtype - return isinstance(dtype, ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype) + return isinstance(dtype, ExtensionDtype) and not isinstance( + dtype, (DatetimeTZDtype, PeriodDtype) + ) def is_extension_array_dtype(arr_or_dtype) -> bool: diff --git a/pandas/core/internals/api.py b/pandas/core/internals/api.py index fe0b36a8ef4d1..537ae8f2a4320 100644 --- a/pandas/core/internals/api.py +++ b/pandas/core/internals/api.py @@ -15,6 +15,7 @@ from pandas.core.dtypes.common import ( is_datetime64tz_dtype, + is_period_dtype, pandas_dtype, ) @@ -62,8 +63,9 @@ def make_block( placement = BlockPlacement(placement) ndim = maybe_infer_ndim(values, placement, ndim) - if is_datetime64tz_dtype(values.dtype): + if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype): # GH#41168 ensure we can pass 1D dt64tz values + # More generally, any EA dtype that isn't is_1d_only_ea_dtype values = extract_array(values, extract_numpy=True) values = ensure_block_shape(values, ndim) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1e798a39a2ece..85aa61142dd39 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -58,6 +58,7 @@ CategoricalDtype, ExtensionDtype, PandasDtype, + PeriodDtype, ) from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -1728,6 +1729,12 @@ class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock values: NDArrayBackedExtensionArray + # error: Signature of "is_extension" incompatible with supertype "Block" + @cache_readonly + def is_extension(self) -> bool: # type: ignore[override] + # i.e. datetime64tz, PeriodDtype + return not isinstance(self.dtype, np.dtype) + @property def is_view(self) -> bool: """return a boolean if I am possibly a view""" @@ -1756,6 +1763,9 @@ def where(self, other, cond) -> list[Block]: try: res_values = arr.T._where(cond, other).T except (ValueError, TypeError): + if isinstance(self.dtype, PeriodDtype): + # TODO: don't special-case + raise blk = self.coerce_to_target_dtype(other) nbs = blk.where(other, cond) return self._maybe_downcast(nbs, "infer") @@ -1949,6 +1959,8 @@ def get_block_type(dtype: DtypeObj): cls = CategoricalBlock elif vtype is Timestamp: cls = DatetimeTZBlock + elif isinstance(dtype, PeriodDtype): + cls = NDArrayBackedExtensionBlock elif isinstance(dtype, ExtensionDtype): # Note: need to be sure PandasArray is unwrapped before we get here cls = ExtensionBlock diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index d7aadd9d5bca6..532309dfc40b3 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -318,7 +318,7 @@ def ndarray_to_mgr( return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ) elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype): - # i.e. Datetime64TZ + # i.e. Datetime64TZ, PeriodDtype values = extract_array(values, extract_numpy=True) if copy: values = values.copy() diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index a231e52d4b027..97f6aa3872c81 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1262,6 +1262,9 @@ def test_parr_add_timedeltalike_scalar(self, three_days, box_with_array): ) obj = tm.box_expected(ser, box_with_array) + if box_with_array is pd.DataFrame: + assert (obj.dtypes == "Period[D]").all() + expected = tm.box_expected(expected, box_with_array) result = obj + three_days diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index 560299a4a47f5..6066d49b68489 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -1,5 +1,7 @@ import pytest +from pandas.compat import pa_version_under2p0 + from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd @@ -69,6 +71,9 @@ def test_arrow_array_missing(): assert result.storage.equals(expected) +@pytest.mark.xfail( + pa_version_under2p0, reason="pyarrow incorrectly uses pandas internals API" +) def test_arrow_table_roundtrip(): from pandas.core.arrays._arrow_utils import ArrowPeriodType @@ -88,6 +93,9 @@ def test_arrow_table_roundtrip(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail( + pa_version_under2p0, reason="pyarrow incorrectly uses pandas internals API" +) def test_arrow_load_from_zero_chunks(): # GH-41040 @@ -106,6 +114,9 @@ def test_arrow_load_from_zero_chunks(): tm.assert_frame_equal(result, df) +@pytest.mark.xfail( + pa_version_under2p0, reason="pyarrow incorrectly uses pandas internals API" +) def test_arrow_table_roundtrip_without_metadata(): arr = PeriodArray([1, 2, 3], freq="H") arr[1] = pd.NaT diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index b577bc7e436df..b9b36f828c357 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1275,7 +1275,7 @@ def test_interval_can_hold_element(self, dtype, element): def test_period_can_hold_element_emptylist(self): pi = period_range("2016", periods=3, freq="A") - blk = new_block(pi._data, [1], ndim=2) + blk = new_block(pi._data.reshape(1, 3), [1], ndim=2) assert blk._can_hold_element([]) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 59c7abc4a4cb8..15d41c56c13c1 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -2,6 +2,8 @@ import numpy as np import pytest +from pandas.compat.pyarrow import pa_version_under2p0 + import pandas as pd import pandas._testing as tm @@ -85,7 +87,11 @@ def test_basic(self): ), } ) - df["periods"] = pd.period_range("2013", freq="M", periods=3) + if not pa_version_under2p0: + # older pyarrow incorrectly uses pandas internal API, so + # constructs invalid Block + df["periods"] = pd.period_range("2013", freq="M", periods=3) + df["timedeltas"] = pd.timedelta_range("1 day", periods=3) df["intervals"] = pd.interval_range(0, 3, 3) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 50d9b75fe9d81..0bd291cea894e 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -648,7 +648,15 @@ def test_use_nullable_dtypes(self, engine, request): "object", "datetime64[ns, UTC]", "float", - "period[D]", + pytest.param( + "period[D]", + # Note: I don't know exactly what version the cutoff is; + # On the CI it fails with 1.0.1 + marks=pytest.mark.xfail( + pa_version_under2p0, + reason="pyarrow uses pandas internal API incorrectly", + ), + ), "Float64", "string", ], @@ -887,6 +895,9 @@ def test_pyarrow_backed_string_array(self, pa, string_storage): check_round_trip(df, pa, expected=df.astype(f"string[{string_storage}]")) @td.skip_if_no("pyarrow") + @pytest.mark.xfail( + pa_version_under2p0, reason="pyarrow uses pandas internal API incorrectly" + ) def test_additional_extension_types(self, pa): # test additional ExtensionArrays that are supported through the # __arrow_array__ protocol + by defining a custom ExtensionType