From 5420b624d449033bc5362334a9238eaa65bd3677 Mon Sep 17 00:00:00 2001 From: codamuse Date: Sun, 13 Nov 2022 23:00:27 -0500 Subject: [PATCH 1/6] remove MultiIndex _get_grouper_for_level and simplify --- pandas/core/groupby/grouper.py | 20 ++------------------ pandas/core/indexes/base.py | 23 ++++++----------------- pandas/core/indexes/multi.py | 26 -------------------------- 3 files changed, 8 insertions(+), 61 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 688dcb44c31f3..3ed198f9278cb 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -480,11 +480,7 @@ def __init__( index_level = index.get_level_values(ilevel) else: index_level = index - ( - self.grouping_vector, # Index - self._codes, - self._group_index, - ) = index_level._get_grouper_for_level(mapper, dropna=dropna) + self.grouping_vector = index_level._get_grouper_for_level(mapper) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes @@ -600,10 +596,6 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: @property def codes(self) -> npt.NDArray[np.signedinteger]: - if self._codes is not None: - # _codes is set in __init__ for MultiIndex cases - return self._codes - return self._codes_and_uniques[0] @cache_readonly @@ -612,11 +604,7 @@ def group_arraylike(self) -> ArrayLike: Analogous to result_index, but holding an ArrayLike to ensure we can retain ExtensionDtypes. """ - if self._group_index is not None: - # _group_index is set in __init__ for MultiIndex cases - return self._group_index._values - - elif self._all_grouper is not None: + if self._all_grouper is not None: # retain dtype for categories, including unobserved ones return self.result_index._values @@ -636,10 +624,6 @@ def result_index(self) -> Index: @cache_readonly def group_index(self) -> Index: - if self._group_index is not None: - # _group_index is set in __init__ for MultiIndex cases - return self._group_index - uniques = self._codes_and_uniques[1] return Index._with_infer(uniques, name=self.name) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 27672c82fdf15..aa3fb538e23fc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2104,41 +2104,30 @@ def _drop_level_numbers(self, levnums: list[int]): verify_integrity=False, ) + @final def _get_grouper_for_level( self, - mapper, - *, - level=None, - dropna: bool = True, - ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]: + mapper + ) -> Index: """ - Get index grouper corresponding to an index level + Get index grouper from a mapping function Parameters ---------- mapper: Group mapping function or None Function mapping index values to groups - level : int or None - Index level, positional - dropna : bool - dropna from groupby - + Returns ------- grouper : Index Index of values to group on. - labels : ndarray of int or None - Array of locations in level_index. - uniques : Index or None - Index of unique values for level. """ - assert level is None or level == 0 if mapper is None: grouper = self else: grouper = self.map(mapper) - return grouper, None, None + return grouper # -------------------------------------------------------------------- # Introspection Methods diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 04a57c1709382..b3378fb9abacd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1457,32 +1457,6 @@ def _set_names(self, names, *, level=None, validate: bool = True): # -------------------------------------------------------------------- - @doc(Index._get_grouper_for_level) - def _get_grouper_for_level( - self, - mapper, - *, - level=None, - dropna: bool = True, - ) -> tuple[Index, npt.NDArray[np.signedinteger] | None, Index | None]: - if mapper is not None: - indexer = self.codes[level] - # Handle group mapping function and return - level_values = self.levels[level].take(indexer) - grouper = level_values.map(mapper) - return grouper, None, None - - values = self.get_level_values(level) - codes, uniques = algos.factorize(values, sort=True, use_na_sentinel=dropna) - assert isinstance(uniques, Index) - - if self.levels[level]._can_hold_na: - grouper = uniques.take(codes, fill_value=True) - else: - grouper = uniques.take(codes) - - return grouper, codes, uniques - @cache_readonly def inferred_type(self) -> str: return "mixed" From 314a5e8a934eb939008ef66e1164f639e07e52f4 Mon Sep 17 00:00:00 2001 From: codamuse Date: Thu, 24 Nov 2022 23:37:13 +0100 Subject: [PATCH 2/6] cln: fix linting errors --- pandas/core/indexes/base.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5f2e2c69e7615..90df0de9b5f24 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2065,10 +2065,7 @@ def _drop_level_numbers(self, levnums: list[int]): ) @final - def _get_grouper_for_level( - self, - mapper - ) -> Index: + def _get_grouper_for_level(self, mapper) -> Index: """ Get index grouper from a mapping function @@ -2076,7 +2073,7 @@ def _get_grouper_for_level( ---------- mapper: Group mapping function or None Function mapping index values to groups - + Returns ------- grouper : Index From f6916a0b79050501e5fa5a000d696cadcac01037 Mon Sep 17 00:00:00 2001 From: codamuse Date: Fri, 25 Nov 2022 00:25:01 +0100 Subject: [PATCH 3/6] raise NotImplementedError if Multi --- pandas/core/indexes/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 90df0de9b5f24..795f8ff25b241 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2079,6 +2079,9 @@ def _get_grouper_for_level(self, mapper) -> Index: grouper : Index Index of values to group on. """ + if self._multi: + raise NotImplementedError + if mapper is None: grouper = self else: From d6283ad3237a7be1ad7646d93acf1600ef701408 Mon Sep 17 00:00:00 2001 From: codamuse Date: Mon, 28 Nov 2022 14:00:22 +0100 Subject: [PATCH 4/6] fix: _multi -> _is_multi --- pandas/core/indexes/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 795f8ff25b241..b46921e796a40 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2079,9 +2079,9 @@ def _get_grouper_for_level(self, mapper) -> Index: grouper : Index Index of values to group on. """ - if self._multi: + if self._is_multi: raise NotImplementedError - + if mapper is None: grouper = self else: From 3352f4a8f5f55169b38866f225041b218b9cd5a5 Mon Sep 17 00:00:00 2001 From: codamuse Date: Tue, 29 Nov 2022 14:02:07 +0100 Subject: [PATCH 5/6] cln: remove _get_grouper_for_level() entirely --- pandas/core/groupby/grouper.py | 8 ++++++-- pandas/core/indexes/base.py | 25 ------------------------- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 3ed198f9278cb..892dea4e2ea9e 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -472,7 +472,6 @@ def __init__( ilevel = self._ilevel if ilevel is not None: - mapper = self.grouping_vector # In extant tests, the new self.grouping_vector matches # `index.get_level_values(ilevel)` whenever # mapper is None and isinstance(index, MultiIndex) @@ -480,7 +479,12 @@ def __init__( index_level = index.get_level_values(ilevel) else: index_level = index - self.grouping_vector = index_level._get_grouper_for_level(mapper) + + if self.grouping_vector is None: + self.grouping_vector = index_level + else: + mapper = self.grouping_vector + self.grouping_vector = index_level.map(mapper) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b46921e796a40..cd21a03b8299f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2064,31 +2064,6 @@ def _drop_level_numbers(self, levnums: list[int]): verify_integrity=False, ) - @final - def _get_grouper_for_level(self, mapper) -> Index: - """ - Get index grouper from a mapping function - - Parameters - ---------- - mapper: Group mapping function or None - Function mapping index values to groups - - Returns - ------- - grouper : Index - Index of values to group on. - """ - if self._is_multi: - raise NotImplementedError - - if mapper is None: - grouper = self - else: - grouper = self.map(mapper) - - return grouper - # -------------------------------------------------------------------- # Introspection Methods From 0dd9ce5138f7f78d8a1563ee1ee767eb253dcf51 Mon Sep 17 00:00:00 2001 From: codamuse Date: Tue, 29 Nov 2022 14:33:58 +0100 Subject: [PATCH 6/6] style: whitespace linter --- pandas/core/groupby/grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 892dea4e2ea9e..9e756e7006bd4 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -479,7 +479,7 @@ def __init__( index_level = index.get_level_values(ilevel) else: index_level = index - + if self.grouping_vector is None: self.grouping_vector = index_level else: