diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 55e8578b2cef4..9287163053cac 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -688,9 +688,9 @@ def describe(self, **kwargs): def value_counts( self, - normalize=False, - sort=True, - ascending=False, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, bins=None, dropna: bool = True, ): @@ -715,7 +715,7 @@ def apply_series_value_counts(): # scalar bins cannot be done at top level # in a backward compatible way return apply_series_value_counts() - elif is_categorical_dtype(val): + elif is_categorical_dtype(val.dtype): # GH38672 return apply_series_value_counts() @@ -807,44 +807,36 @@ def apply_series_value_counts(): sorter = np.lexsort((out if ascending else -out, cat)) out, codes[-1] = out[sorter], codes[-1][sorter] - if bins is None: - mi = MultiIndex( - levels=levels, codes=codes, names=names, verify_integrity=False - ) - - if is_integer_dtype(out): - out = ensure_int64(out) - return self.obj._constructor(out, index=mi, name=self._selection_name) - - # for compat. with libgroupby.value_counts need to ensure every - # bin is present at every index level, null filled with zeros - diff = np.zeros(len(out), dtype="bool") - for level_codes in codes[:-1]: - diff |= np.r_[True, level_codes[1:] != level_codes[:-1]] + if bins is not None: + # for compat. with libgroupby.value_counts need to ensure every + # bin is present at every index level, null filled with zeros + diff = np.zeros(len(out), dtype="bool") + for level_codes in codes[:-1]: + diff |= np.r_[True, level_codes[1:] != level_codes[:-1]] - ncat, nbin = diff.sum(), len(levels[-1]) + ncat, nbin = diff.sum(), len(levels[-1]) - left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)] + left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)] - right = [diff.cumsum() - 1, codes[-1]] + right = [diff.cumsum() - 1, codes[-1]] - _, idx = get_join_indexers(left, right, sort=False, how="left") - out = np.where(idx != -1, out[idx], 0) + _, idx = get_join_indexers(left, right, sort=False, how="left") + out = np.where(idx != -1, out[idx], 0) - if sort: - sorter = np.lexsort((out if ascending else -out, left[0])) - out, left[-1] = out[sorter], left[-1][sorter] + if sort: + sorter = np.lexsort((out if ascending else -out, left[0])) + out, left[-1] = out[sorter], left[-1][sorter] - # build the multi-index w/ full levels - def build_codes(lev_codes: np.ndarray) -> np.ndarray: - return np.repeat(lev_codes[diff], nbin) + # build the multi-index w/ full levels + def build_codes(lev_codes: np.ndarray) -> np.ndarray: + return np.repeat(lev_codes[diff], nbin) - codes = [build_codes(lev_codes) for lev_codes in codes[:-1]] - codes.append(left[-1]) + codes = [build_codes(lev_codes) for lev_codes in codes[:-1]] + codes.append(left[-1]) mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False) - if is_integer_dtype(out): + if is_integer_dtype(out.dtype): out = ensure_int64(out) return self.obj._constructor(out, index=mi, name=self._selection_name) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c20a9b7ad2210..1105c1bd1d782 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1837,7 +1837,7 @@ def first(x: Series): return obj.apply(first, axis=axis) elif isinstance(obj, Series): return first(obj) - else: + else: # pragma: no cover raise TypeError(type(obj)) return self._agg_general( @@ -1862,7 +1862,7 @@ def last(x: Series): return obj.apply(last, axis=axis) elif isinstance(obj, Series): return last(obj) - else: + else: # pragma: no cover raise TypeError(type(obj)) return self._agg_general( @@ -3271,7 +3271,7 @@ def get_groupby( from pandas.core.groupby.generic import DataFrameGroupBy klass = DataFrameGroupBy - else: + else: # pragma: no cover raise TypeError(f"invalid type: {obj}") return klass( diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index b88f2b0200768..46b47bc29d8a6 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -276,11 +276,11 @@ def get_out_dtype(self, dtype: np.dtype) -> np.dtype: @overload def _get_result_dtype(self, dtype: np.dtype) -> np.dtype: - ... + ... # pragma: no cover @overload def _get_result_dtype(self, dtype: ExtensionDtype) -> ExtensionDtype: - ... + ... # pragma: no cover def _get_result_dtype(self, dtype: DtypeObj) -> DtypeObj: """