diff --git a/pandas/_testing.py b/pandas/_testing.py index eb4eb86c78b2d..74c4c661b4b83 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1519,11 +1519,13 @@ def box_expected(expected, box_cls, transpose=True): def to_array(obj): # temporary implementation until we get pd.array in place - if is_period_dtype(obj): + dtype = getattr(obj, "dtype", None) + + if is_period_dtype(dtype): return period_array(obj) - elif is_datetime64_dtype(obj) or is_datetime64tz_dtype(obj): + elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): return DatetimeArray._from_sequence(obj) - elif is_timedelta64_dtype(obj): + elif is_timedelta64_dtype(dtype): return TimedeltaArray._from_sequence(obj) else: return np.array(obj) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ce22c3f5416fa..449d9ad8301e4 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -11,7 +11,7 @@ from pandas._libs import Timestamp, algos, hashtable as htable, lib from pandas._libs.tslib import iNaT -from pandas._typing import AnyArrayLike +from pandas._typing import AnyArrayLike, DtypeObj from pandas.util._decorators import doc from pandas.core.dtypes.cast import ( @@ -126,20 +126,21 @@ def _ensure_data(values, dtype=None): return ensure_object(values), "object" # datetimelike - if needs_i8_conversion(values) or needs_i8_conversion(dtype): - if is_period_dtype(values) or is_period_dtype(dtype): + vals_dtype = getattr(values, "dtype", None) + if needs_i8_conversion(vals_dtype) or needs_i8_conversion(dtype): + if is_period_dtype(vals_dtype) or is_period_dtype(dtype): from pandas import PeriodIndex values = PeriodIndex(values) dtype = values.dtype - elif is_timedelta64_dtype(values) or is_timedelta64_dtype(dtype): + elif is_timedelta64_dtype(vals_dtype) or is_timedelta64_dtype(dtype): from pandas import TimedeltaIndex values = TimedeltaIndex(values) dtype = values.dtype else: # Datetime - if values.ndim > 1 and is_datetime64_ns_dtype(values): + if values.ndim > 1 and is_datetime64_ns_dtype(vals_dtype): # Avoid calling the DatetimeIndex constructor as it is 1D only # Note: this is reached by DataFrame.rank calls GH#27027 # TODO(EA2D): special case not needed with 2D EAs @@ -154,7 +155,7 @@ def _ensure_data(values, dtype=None): return values.asi8, dtype - elif is_categorical_dtype(values) and ( + elif is_categorical_dtype(vals_dtype) and ( is_categorical_dtype(dtype) or dtype is None ): values = values.codes @@ -1080,7 +1081,7 @@ def nsmallest(self): return self.compute("nsmallest") @staticmethod - def is_valid_dtype_n_method(dtype) -> bool: + def is_valid_dtype_n_method(dtype: DtypeObj) -> bool: """ Helper function to determine if dtype is valid for nsmallest/nlargest methods @@ -1863,7 +1864,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3): is_timedelta = False is_bool = False - if needs_i8_conversion(arr): + if needs_i8_conversion(arr.dtype): dtype = np.float64 arr = arr.view("i8") na = iNaT diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index de401368d55d7..de12d96556263 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1424,7 +1424,7 @@ def _internal_get_values(self): Index if datetime / periods. """ # if we are a datetime and period index, return Index to keep metadata - if needs_i8_conversion(self.categories): + if needs_i8_conversion(self.categories.dtype): return self.categories.take(self._codes, fill_value=np.nan) elif is_integer_dtype(self.categories) and -1 in self._codes: return self.categories.astype("object").take(self._codes, fill_value=np.nan) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e07e2da164cac..25ac3e445822e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -782,7 +782,7 @@ def _validate_searchsorted_value(self, value): elif is_list_like(value) and not isinstance(value, type(self)): value = array(value) - if not type(self)._is_recognized_dtype(value): + if not type(self)._is_recognized_dtype(value.dtype): raise TypeError( "searchsorted requires compatible dtype or scalar, " f"not {type(value).__name__}" @@ -806,7 +806,7 @@ def _validate_setitem_value(self, value): except ValueError: pass - if not type(self)._is_recognized_dtype(value): + if not type(self)._is_recognized_dtype(value.dtype): raise TypeError( "setitem requires compatible dtype or scalar, " f"not {type(value).__name__}" @@ -1024,7 +1024,7 @@ def fillna(self, value=None, method=None, limit=None): func = missing.backfill_1d values = self._data - if not is_period_dtype(self): + if not is_period_dtype(self.dtype): # For PeriodArray self._data is i8, which gets copied # by `func`. Otherwise we need to make a copy manually # to avoid modifying `self` in-place. @@ -1109,10 +1109,7 @@ def _validate_frequency(cls, index, freq, **kwargs): freq : DateOffset The frequency to validate """ - if is_period_dtype(cls): - # Frequency validation is not meaningful for Period Array/Index - return None - + # TODO: this is not applicable to PeriodArray, move to correct Mixin inferred = index.inferred_freq if index.size == 0 or inferred == freq.freqstr: return None @@ -1253,7 +1250,7 @@ def _add_nat(self): """ Add pd.NaT to self """ - if is_period_dtype(self): + if is_period_dtype(self.dtype): raise TypeError( f"Cannot add {type(self).__name__} and {type(NaT).__name__}" ) @@ -1293,7 +1290,7 @@ def _sub_period_array(self, other): result : np.ndarray[object] Array of DateOffset objects; nulls represented by NaT. """ - if not is_period_dtype(self): + if not is_period_dtype(self.dtype): raise TypeError( f"cannot subtract {other.dtype}-dtype from {type(self).__name__}" ) @@ -1398,7 +1395,7 @@ def __add__(self, other): elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these - if not is_period_dtype(self): + if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) result = self._time_shift(other) @@ -1413,7 +1410,7 @@ def __add__(self, other): # DatetimeIndex, ndarray[datetime64] return self._add_datetime_arraylike(other) elif is_integer_dtype(other): - if not is_period_dtype(self): + if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) result = self._addsub_int_array(other, operator.add) else: @@ -1437,6 +1434,8 @@ def __radd__(self, other): @unpack_zerodim_and_defer("__sub__") def __sub__(self, other): + other_dtype = getattr(other, "dtype", None) + # scalar others if other is NaT: result = self._sub_nat() @@ -1450,7 +1449,7 @@ def __sub__(self, other): elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these - if not is_period_dtype(self): + if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) result = self._time_shift(-other) @@ -1467,11 +1466,11 @@ def __sub__(self, other): elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datetime_arraylike(other) - elif is_period_dtype(other): + elif is_period_dtype(other_dtype): # PeriodIndex result = self._sub_period_array(other) - elif is_integer_dtype(other): - if not is_period_dtype(self): + elif is_integer_dtype(other_dtype): + if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) result = self._addsub_int_array(other, operator.sub) else: @@ -1520,7 +1519,7 @@ def __iadd__(self, other): result = self + other self[:] = result[:] - if not is_period_dtype(self): + if not is_period_dtype(self.dtype): # restore freq, which is invalidated by setitem self._freq = result._freq return self @@ -1529,7 +1528,7 @@ def __isub__(self, other): result = self - other self[:] = result[:] - if not is_period_dtype(self): + if not is_period_dtype(self.dtype): # restore freq, which is invalidated by setitem self._freq = result._freq return self @@ -1621,7 +1620,7 @@ def mean(self, skipna=True): ----- mean is only defined for Datetime and Timedelta dtypes, not for Period. """ - if is_period_dtype(self): + if is_period_dtype(self.dtype): # See discussion in GH#24757 raise TypeError( f"mean is not implemented for {type(self).__name__} since the " diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 1460a2e762771..88ee6d010e52a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -828,9 +828,11 @@ def period_array( ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] Length: 4, dtype: period[Q-DEC] """ - if is_datetime64_dtype(data): + data_dtype = getattr(data, "dtype", None) + + if is_datetime64_dtype(data_dtype): return PeriodArray._from_datetime64(data, freq) - if is_period_dtype(data): + if is_period_dtype(data_dtype): return PeriodArray(data, freq) # other iterable of some kind diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 87ff9a78909dd..048fd7adf55b1 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -424,7 +424,7 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool: return True # NaNs can occur in float and complex arrays. - if is_float_dtype(left) or is_complex_dtype(left): + if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): # empty if not (np.prod(left.shape) and np.prod(right.shape)): @@ -435,7 +435,7 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool: # GH#29553 avoid numpy deprecation warning return False - elif needs_i8_conversion(left) or needs_i8_conversion(right): + elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): # datetime64, timedelta64, Period if not is_dtype_equal(left.dtype, right.dtype): return False @@ -460,7 +460,7 @@ def _infer_fill_value(val): if not is_list_like(val): val = [val] val = np.array(val, copy=False) - if needs_i8_conversion(val): + if needs_i8_conversion(val.dtype): return np.array("NaT", dtype=val.dtype) elif is_object_dtype(val.dtype): dtype = lib.infer_dtype(ensure_object(val), skipna=False) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1bff17b40433d..a46b9c8d1661c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5916,7 +5916,7 @@ def extract_values(arr): if isinstance(arr, (ABCIndexClass, ABCSeries)): arr = arr._values - if needs_i8_conversion(arr): + if needs_i8_conversion(arr.dtype): if is_extension_array_dtype(arr.dtype): arr = arr.asi8 else: diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 90c9f4562515a..db2264db438f4 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -49,7 +49,7 @@ def _get_values(self): elif is_timedelta64_dtype(data.dtype): return TimedeltaIndex(data, copy=False, name=self.name) - elif is_period_dtype(data): + elif is_period_dtype(data.dtype): return PeriodArray(data, copy=False) raise TypeError( @@ -449,7 +449,7 @@ def __new__(cls, data: "Series"): return DatetimeProperties(data, orig) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(data, orig) - elif is_period_dtype(data): + elif is_period_dtype(data.dtype): return PeriodProperties(data, orig) raise AttributeError("Can only use .dt accessor with datetimelike values") diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3f18f69149658..e26dc5b9e4fb3 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -118,7 +118,7 @@ def __array_wrap__(self, result, context=None): return result attrs = self._get_attributes_dict() - if not is_period_dtype(self) and attrs["freq"]: + if not is_period_dtype(self.dtype) and attrs["freq"]: # no need to infer if freq is None attrs["freq"] = "infer" return Index(result, **attrs) @@ -542,7 +542,7 @@ def delete(self, loc): new_i8s = np.delete(self.asi8, loc) freq = None - if is_period_dtype(self): + if is_period_dtype(self.dtype): freq = self.freq elif is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index afca4ca86bd3f..39df7ae3c9b60 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2673,13 +2673,13 @@ def get_block_type(values, dtype=None): elif is_categorical_dtype(values.dtype): cls = CategoricalBlock elif issubclass(vtype, np.datetime64): - assert not is_datetime64tz_dtype(values) + assert not is_datetime64tz_dtype(values.dtype) cls = DatetimeBlock - elif is_datetime64tz_dtype(values): + elif is_datetime64tz_dtype(values.dtype): cls = DatetimeTZBlock elif is_interval_dtype(dtype) or is_period_dtype(dtype): cls = ObjectValuesExtensionBlock - elif is_extension_array_dtype(values): + elif is_extension_array_dtype(values.dtype): cls = ExtensionBlock elif issubclass(vtype, np.floating): cls = FloatBlock diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 7b5399ca85e79..79bbef5fa5505 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -266,7 +266,7 @@ def interpolate_1d( if method in ("values", "index"): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 - if needs_i8_conversion(inds.dtype.type): + if needs_i8_conversion(inds.dtype): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 32b05872ded3f..49920815c3439 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -287,7 +287,7 @@ def _get_values( dtype = values.dtype - if needs_i8_conversion(values): + if needs_i8_conversion(values.dtype): # changing timedelta64/datetime64 to int64 needs to happen after # finding `mask` above values = np.asarray(values.view("i8")) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 882e3e0a649cc..133fba0246497 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -232,10 +232,10 @@ def get_new_values(self, values, fill_value=None): # we need to convert to a basic dtype # and possibly coerce an input to our output dtype # e.g. ints -> floats - if needs_i8_conversion(values): + if needs_i8_conversion(values.dtype): sorted_values = sorted_values.view("i8") new_values = new_values.view("i8") - elif is_bool_dtype(values): + elif is_bool_dtype(values.dtype): sorted_values = sorted_values.astype("object") new_values = new_values.astype("object") else: @@ -253,7 +253,7 @@ def get_new_values(self, values, fill_value=None): ) # reconstruct dtype if needed - if needs_i8_conversion(values): + if needs_i8_conversion(values.dtype): new_values = new_values.view(values.dtype) return new_values, new_mask diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index eb7f15c78b671..ac6f9ff372601 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -306,7 +306,7 @@ def __init__( if len(timedeltas): obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) # Convert PeriodIndex to datetimes before serializing - if is_period_dtype(obj.index): + if is_period_dtype(obj.index.dtype): obj.index = obj.index.to_timestamp() # exclude index from obj if index=False diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 50686594a7576..239ff6241aab0 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -105,22 +105,16 @@ def convert_pandas_type_to_json_field(arr): name = arr.name field = {"name": name, "type": as_json_table_type(dtype)} - if is_categorical_dtype(arr): - if hasattr(arr, "categories"): - cats = arr.categories - ordered = arr.ordered - else: - cats = arr.cat.categories - ordered = arr.cat.ordered + if is_categorical_dtype(dtype): + cats = dtype.categories + ordered = dtype.ordered + field["constraints"] = {"enum": list(cats)} field["ordered"] = ordered - elif is_period_dtype(arr): - field["freq"] = arr.freqstr - elif is_datetime64tz_dtype(arr): - if hasattr(arr, "dt"): - field["tz"] = arr.dt.tz.zone - else: - field["tz"] = arr.tz.zone + elif is_period_dtype(dtype): + field["freq"] = dtype.freq.freqstr + elif is_datetime64tz_dtype(dtype): + field["tz"] = dtype.tz.zone return field diff --git a/pandas/tests/base/test_fillna.py b/pandas/tests/base/test_fillna.py index 5e50a9e2d1c7f..630ecdacd91d1 100644 --- a/pandas/tests/base/test_fillna.py +++ b/pandas/tests/base/test_fillna.py @@ -50,7 +50,7 @@ def test_fillna_null(null_obj, index_or_series_obj): values = obj.values fill_value = values[0] expected = values.copy() - if needs_i8_conversion(obj): + if needs_i8_conversion(obj.dtype): values[0:2] = iNaT expected[0:2] = fill_value else: diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index c6225c9b5ca64..eca402af252f8 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -43,7 +43,7 @@ def test_unique_null(null_obj, index_or_series_obj): pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj.values - if needs_i8_conversion(obj): + if needs_i8_conversion(obj.dtype): values[0:2] = iNaT else: values[0:2] = null_obj @@ -89,7 +89,7 @@ def test_nunique_null(null_obj, index_or_series_obj): pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj.values - if needs_i8_conversion(obj): + if needs_i8_conversion(obj.dtype): values[0:2] = iNaT else: values[0:2] = null_obj diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index d45feaff68dde..2514b11613ef6 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -55,7 +55,7 @@ def test_value_counts_null(null_obj, index_or_series_obj): pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj.values - if needs_i8_conversion(obj): + if needs_i8_conversion(obj.dtype): values[0:2] = iNaT else: values[0:2] = null_obj diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 01d72670f37aa..a08001e042f36 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -215,10 +215,10 @@ def test_get_unique_index(self, indices): if not indices._can_hold_na: pytest.skip("Skip na-check if index cannot hold na") - if is_period_dtype(indices): + if is_period_dtype(indices.dtype): vals = indices[[0] * 5]._data vals[0] = pd.NaT - elif needs_i8_conversion(indices): + elif needs_i8_conversion(indices.dtype): vals = indices.asi8[[0] * 5] vals[0] = iNaT else: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 12320cd52cec8..abd4bf288bd9e 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -272,12 +272,15 @@ def infer_freq(index, warn: bool = True) -> Optional[str]: index = values inferer: _FrequencyInferer - if is_period_dtype(index): + + if not hasattr(index, "dtype"): + pass + elif is_period_dtype(index.dtype): raise TypeError( "PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq." ) - elif is_timedelta64_dtype(index): + elif is_timedelta64_dtype(index.dtype): # Allow TimedeltaIndex and TimedeltaArray inferer = _TimedeltaFrequencyInferer(index, warn=warn) return inferer.get_freq()