Skip to content

Commit 36ba8a6

Browse files
committed
CLN: reorg groupby to multiple modules
1 parent 486bfe8 commit 36ba8a6

18 files changed

+2935
-2807
lines changed

pandas/core/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pandas.core.algorithms import factorize, unique, value_counts
88
from pandas.core.dtypes.missing import isna, isnull, notna, notnull
99
from pandas.core.arrays import Categorical
10-
from pandas.core.groupby.groupby import Grouper
10+
from pandas.core.groupby import Grouper
1111
from pandas.io.formats.format import set_eng_float_format
1212
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
1313
UInt64Index, RangeIndex, Float64Index,

pandas/core/base.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -648,53 +648,6 @@ def _is_builtin_func(self, arg):
648648
return self._builtin_table.get(arg, arg)
649649

650650

651-
class GroupByMixin(object):
652-
""" provide the groupby facilities to the mixed object """
653-
654-
@staticmethod
655-
def _dispatch(name, *args, **kwargs):
656-
""" dispatch to apply """
657-
658-
def outer(self, *args, **kwargs):
659-
def f(x):
660-
x = self._shallow_copy(x, groupby=self._groupby)
661-
return getattr(x, name)(*args, **kwargs)
662-
return self._groupby.apply(f)
663-
outer.__name__ = name
664-
return outer
665-
666-
def _gotitem(self, key, ndim, subset=None):
667-
"""
668-
sub-classes to define
669-
return a sliced object
670-
671-
Parameters
672-
----------
673-
key : string / list of selections
674-
ndim : 1,2
675-
requested ndim of result
676-
subset : object, default None
677-
subset to act on
678-
"""
679-
# create a new object to prevent aliasing
680-
if subset is None:
681-
subset = self.obj
682-
683-
# we need to make a shallow copy of ourselves
684-
# with the same groupby
685-
kwargs = dict([(attr, getattr(self, attr))
686-
for attr in self._attributes])
687-
self = self.__class__(subset,
688-
groupby=self._groupby[key],
689-
parent=self,
690-
**kwargs)
691-
self._reset_cache()
692-
if subset.ndim == 2:
693-
if is_scalar(key) and key in subset or is_list_like(key):
694-
self._selection = key
695-
return self
696-
697-
698651
class IndexOpsMixin(object):
699652
""" common ops mixin to support a unified interface / docs for Series /
700653
Index

pandas/core/groupby/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# flake8: noqa
2-
from pandas.core.groupby.groupby import (
3-
Grouper, GroupBy, SeriesGroupBy, DataFrameGroupBy
4-
)
2+
from pandas.core.groupby.groupby import GroupBy
3+
from pandas.core.groupby.series import SeriesGroupBy
4+
from pandas.core.groupby.frame import DataFrameGroupBy
5+
from pandas.core.groupby.ops import Grouper

pandas/core/groupby/base.py

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
"""
2+
base components for groupby
3+
"""
4+
5+
import types
6+
from pandas.core.dtypes.common import is_scalar, is_list_like
7+
from pandas.util._decorators import make_signature
8+
9+
10+
class GroupByMixin(object):
11+
""" provide the groupby facilities to the mixed object """
12+
13+
@staticmethod
14+
def _dispatch(name, *args, **kwargs):
15+
""" dispatch to apply """
16+
17+
def outer(self, *args, **kwargs):
18+
def f(x):
19+
x = self._shallow_copy(x, groupby=self._groupby)
20+
return getattr(x, name)(*args, **kwargs)
21+
return self._groupby.apply(f)
22+
outer.__name__ = name
23+
return outer
24+
25+
def _gotitem(self, key, ndim, subset=None):
26+
"""
27+
sub-classes to define
28+
return a sliced object
29+
30+
Parameters
31+
----------
32+
key : string / list of selections
33+
ndim : 1,2
34+
requested ndim of result
35+
subset : object, default None
36+
subset to act on
37+
"""
38+
# create a new object to prevent aliasing
39+
if subset is None:
40+
subset = self.obj
41+
42+
# we need to make a shallow copy of ourselves
43+
# with the same groupby
44+
kwargs = dict([(attr, getattr(self, attr))
45+
for attr in self._attributes])
46+
self = self.__class__(subset,
47+
groupby=self._groupby[key],
48+
parent=self,
49+
**kwargs)
50+
self._reset_cache()
51+
if subset.ndim == 2:
52+
if is_scalar(key) and key in subset or is_list_like(key):
53+
self._selection = key
54+
return self
55+
56+
57+
# special case to prevent duplicate plots when catching exceptions when
58+
# forwarding methods from NDFrames
59+
plotting_methods = frozenset(['plot', 'boxplot', 'hist'])
60+
61+
common_apply_whitelist = frozenset([
62+
'last', 'first',
63+
'head', 'tail', 'median',
64+
'mean', 'sum', 'min', 'max',
65+
'cumcount', 'ngroup',
66+
'resample',
67+
'rank', 'quantile',
68+
'fillna',
69+
'mad',
70+
'any', 'all',
71+
'take',
72+
'idxmax', 'idxmin',
73+
'shift', 'tshift',
74+
'ffill', 'bfill',
75+
'pct_change', 'skew',
76+
'corr', 'cov', 'diff',
77+
]) | plotting_methods
78+
79+
series_apply_whitelist = ((common_apply_whitelist |
80+
{'nlargest', 'nsmallest',
81+
'is_monotonic_increasing',
82+
'is_monotonic_decreasing'}) -
83+
{'boxplot'}) | frozenset(['dtype', 'unique'])
84+
85+
dataframe_apply_whitelist = ((common_apply_whitelist |
86+
frozenset(['dtypes', 'corrwith'])) -
87+
{'boxplot'})
88+
89+
cython_transforms = frozenset(['cumprod', 'cumsum', 'shift',
90+
'cummin', 'cummax'])
91+
92+
cython_cast_blacklist = frozenset(['rank', 'count', 'size'])
93+
94+
95+
def whitelist_method_generator(base, klass, whitelist):
96+
"""
97+
Yields all GroupBy member defs for DataFrame/Series names in whitelist.
98+
99+
Parameters
100+
----------
101+
base : class
102+
base class
103+
klass : class
104+
class where members are defined.
105+
Should be Series or DataFrame
106+
whitelist : list
107+
list of names of klass methods to be constructed
108+
109+
Returns
110+
-------
111+
The generator yields a sequence of strings, each suitable for exec'ing,
112+
that define implementations of the named methods for DataFrameGroupBy
113+
or SeriesGroupBy.
114+
115+
Since we don't want to override methods explicitly defined in the
116+
base class, any such name is skipped.
117+
"""
118+
119+
method_wrapper_template = \
120+
"""def %(name)s(%(sig)s) :
121+
\"""
122+
%(doc)s
123+
\"""
124+
f = %(self)s.__getattr__('%(name)s')
125+
return f(%(args)s)"""
126+
property_wrapper_template = \
127+
"""@property
128+
def %(name)s(self) :
129+
\"""
130+
%(doc)s
131+
\"""
132+
return self.__getattr__('%(name)s')"""
133+
134+
for name in whitelist:
135+
# don't override anything that was explicitly defined
136+
# in the base class
137+
if hasattr(base, name):
138+
continue
139+
# ugly, but we need the name string itself in the method.
140+
f = getattr(klass, name)
141+
doc = f.__doc__
142+
doc = doc if type(doc) == str else ''
143+
if isinstance(f, types.MethodType):
144+
wrapper_template = method_wrapper_template
145+
decl, args = make_signature(f)
146+
# pass args by name to f because otherwise
147+
# GroupBy._make_wrapper won't know whether
148+
# we passed in an axis parameter.
149+
args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]]
150+
params = {'name': name,
151+
'doc': doc,
152+
'sig': ','.join(decl),
153+
'self': args[0],
154+
'args': ','.join(args_by_name)}
155+
else:
156+
wrapper_template = property_wrapper_template
157+
params = {'name': name, 'doc': doc}
158+
yield wrapper_template % params

0 commit comments

Comments
 (0)