From 9a298eed4a746a459ba1c6d2b0dfa0ba44b58612 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Jul 2023 12:18:20 -0700 Subject: [PATCH] CLN: is_mixed_type --- pandas/core/frame.py | 29 ++++---------------------- pandas/core/generic.py | 11 +++++----- pandas/core/internals/array_manager.py | 4 ---- pandas/core/series.py | 5 ----- 4 files changed, 9 insertions(+), 40 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ae43a44d68f1c..f90b5c0eedbe8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -961,13 +961,6 @@ def _is_homogeneous_type(self) -> bool: ------- bool - See Also - -------- - Index._is_homogeneous_type : Whether the object has a single - dtype. - MultiIndex._is_homogeneous_type : Whether all the levels of a - MultiIndex have the same dtype. - Examples -------- >>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous_type @@ -983,12 +976,8 @@ def _is_homogeneous_type(self) -> bool: ... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous_type False """ - if isinstance(self._mgr, ArrayManager): - return len({arr.dtype for arr in self._mgr.arrays}) == 1 - if self._mgr.any_extension_types: - return len({block.dtype for block in self._mgr.blocks}) == 1 - else: - return not self._is_mixed_type + # The "<" part of "<=" here is for empty DataFrame cases + return len({arr.dtype for arr in self._mgr.arrays}) <= 1 @property def _can_fast_transpose(self) -> bool: @@ -4958,7 +4947,7 @@ def _reindex_multi( if row_indexer is not None and col_indexer is not None: # Fastpath. By doing two 'take's at once we avoid making an # unnecessary copy. - # We only get here with `not self._is_mixed_type`, which (almost) + # We only get here with `self._can_fast_transpose`, which (almost) # ensures that self.values is cheap. It may be worth making this # condition more specific. indexer = row_indexer, col_indexer @@ -10849,17 +10838,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False): if len(frame._get_axis(axis)) == 0: result = self._constructor_sliced(0, index=frame._get_agg_axis(axis)) else: - if frame._is_mixed_type or frame._mgr.any_extension_types: - # the or any_extension_types is really only hit for single- - # column frames with an extension array - result = notna(frame).sum(axis=axis) - else: - # GH13407 - series_counts = notna(frame).sum(axis=axis) - counts = series_counts._values - result = self._constructor_sliced( - counts, index=frame._get_agg_axis(axis), copy=False - ) + result = notna(frame).sum(axis=axis) return result.astype("int64").__finalize__(self, method="count") diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f049e9d479b26..ec0f477a7d0ff 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5536,12 +5536,9 @@ def _needs_reindex_multi(self, axes, method, level: Level | None) -> bool_t: (common.count_not_none(*axes.values()) == self._AXIS_LEN) and method is None and level is None - and not self._is_mixed_type - and not ( - self.ndim == 2 - and len(self.dtypes) == 1 - and isinstance(self.dtypes.iloc[0], ExtensionDtype) - ) + # reindex_multi calls self.values, so we only want to go + # down that path when doing so is cheap. + and self._can_fast_transpose ) def _reindex_multi(self, axes, copy, fill_value): @@ -6266,9 +6263,11 @@ def _consolidate(self): self ) + @final @property def _is_mixed_type(self) -> bool_t: if self._mgr.is_single_block: + # Includes all Series cases return False if self._mgr.any_extension_types: diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index f402c9ced0e19..431de70a25392 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -347,10 +347,6 @@ def _convert(arr): def to_native_types(self, **kwargs) -> Self: return self.apply(to_native_types, **kwargs) - @property - def is_mixed_type(self) -> bool: - return True - @property def any_extension_types(self) -> bool: """Whether any of the blocks in this manager are extension blocks""" diff --git a/pandas/core/series.py b/pandas/core/series.py index e59a4cfc3fcc1..e1b76fe132a11 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1383,7 +1383,6 @@ def _maybe_update_cacher( return cacher = getattr(self, "_cacher", None) if cacher is not None: - assert self.ndim == 1 ref: DataFrame = cacher[1]() # we are trying to reference a dead referent, hence @@ -1407,10 +1406,6 @@ def _maybe_update_cacher( # ---------------------------------------------------------------------- # Unsorted - @property - def _is_mixed_type(self) -> bool: - return False - def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: """ Repeat elements of a Series.