Skip to content

Expand groupby dispatch whitelist (GH5480) #5604

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 7, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 29 additions & 9 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,30 @@
# forwarding methods from NDFrames
_plotting_methods = frozenset(['plot', 'boxplot', 'hist'])

_apply_whitelist = frozenset(['last', 'first',
'mean', 'sum', 'min', 'max',
'cumsum', 'cumprod', 'cummin', 'cummax',
'resample',
'describe',
'rank', 'quantile', 'count',
'fillna', 'dtype']) | _plotting_methods
_common_apply_whitelist = frozenset([
'last', 'first',
'head', 'tail', 'median',
'mean', 'sum', 'min', 'max',
'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
'resample',
'describe',
'rank', 'quantile', 'count',
'fillna',
'mad',
'any', 'all',
'irow', 'take',
'shift', 'tshift',
'ffill', 'bfill',
'pct_change', 'skew',
'corr', 'cov',
]) | _plotting_methods

_series_apply_whitelist = \
(_common_apply_whitelist - set(['boxplot'])) | \
frozenset(['dtype', 'value_counts'])

_dataframe_apply_whitelist = \
_common_apply_whitelist | frozenset(['dtypes', 'corrwith'])


class GroupByError(Exception):
Expand Down Expand Up @@ -185,6 +202,7 @@ class GroupBy(PandasObject):
len(grouped) : int
Number of groups
"""
_apply_whitelist = _common_apply_whitelist

def __init__(self, obj, keys=None, axis=0, level=None,
grouper=None, exclusions=None, selection=None, as_index=True,
Expand Down Expand Up @@ -252,7 +270,7 @@ def _selection_list(self):
return self._selection

def _local_dir(self):
return sorted(set(self.obj._local_dir() + list(_apply_whitelist)))
return sorted(set(self.obj._local_dir() + list(self._apply_whitelist)))

def __getattr__(self, attr):
if attr in self.obj:
Expand All @@ -268,7 +286,7 @@ def __getitem__(self, key):
raise NotImplementedError

def _make_wrapper(self, name):
if name not in _apply_whitelist:
if name not in self._apply_whitelist:
is_callable = callable(getattr(self.obj, name, None))
kind = ' callable ' if is_callable else ' '
msg = ("Cannot access{0}attribute {1!r} of {2!r} objects, try "
Expand Down Expand Up @@ -1605,6 +1623,7 @@ def _convert_grouper(axis, grouper):


class SeriesGroupBy(GroupBy):
_apply_whitelist = _series_apply_whitelist

def aggregate(self, func_or_funcs, *args, **kwargs):
"""
Expand Down Expand Up @@ -2401,6 +2420,7 @@ def add_indices():


class DataFrameGroupBy(NDFrameGroupBy):
_apply_whitelist = _dataframe_apply_whitelist

_block_agg_axis = 1

Expand Down
73 changes: 67 additions & 6 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3221,10 +3221,67 @@ def test_groupby_whitelist(self):
'letters': Series(random_letters)})
s = df.floats

blacklist = ['eval', 'query', 'abs', 'shift', 'tshift', 'where',
'mask', 'align', 'groupby', 'clip', 'astype',
'at', 'combine', 'consolidate', 'convert_objects',
'corr', 'corr_with', 'cov']
df_whitelist = frozenset([
'last', 'first',
'mean', 'sum', 'min', 'max',
'head', 'tail',
'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
'resample',
'describe',
'rank', 'quantile', 'count',
'fillna',
'mad',
'any', 'all',
'irow', 'take',
'shift', 'tshift',
'ffill', 'bfill',
'pct_change', 'skew',
'plot', 'boxplot', 'hist',
'median', 'dtypes',
'corrwith', 'corr', 'cov',
])
s_whitelist = frozenset([
'last', 'first',
'mean', 'sum', 'min', 'max',
'head', 'tail',
'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
'resample',
'describe',
'rank', 'quantile', 'count',
'fillna',
'mad',
'any', 'all',
'irow', 'take',
'shift', 'tshift',
'ffill', 'bfill',
'pct_change', 'skew',
'plot', 'hist',
'median', 'dtype',
'corr', 'cov',
'value_counts',
])

for obj, whitelist in zip((df, s),
(df_whitelist, s_whitelist)):
gb = obj.groupby(df.letters)
self.assertEqual(whitelist, gb._apply_whitelist)
for m in whitelist:
getattr(gb, m)

def test_groupby_blacklist(self):
from string import ascii_lowercase
letters = np.array(list(ascii_lowercase))
N = 10
random_letters = letters.take(np.random.randint(0, 26, N))
df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
'letters': Series(random_letters)})
s = df.floats

blacklist = [
'eval', 'query', 'abs', 'where',
'mask', 'align', 'groupby', 'clip', 'astype',
'at', 'combine', 'consolidate', 'convert_objects',
]
to_methods = [method for method in dir(df) if method.startswith('to_')]

blacklist.extend(to_methods)
Expand Down Expand Up @@ -3319,8 +3376,12 @@ def test_tab_completion(self):
'groups','hist','indices','last','max','mean','median',
'min','name','ngroups','nth','ohlc','plot', 'prod',
'size','std','sum','transform','var', 'count', 'head', 'describe',
'cummax', 'dtype', 'quantile', 'rank', 'cumprod', 'tail',
'resample', 'cummin', 'fillna', 'cumsum', 'cumcount'])
'cummax', 'quantile', 'rank', 'cumprod', 'tail',
'resample', 'cummin', 'fillna', 'cumsum', 'cumcount',
'all', 'shift', 'skew', 'bfill', 'irow', 'ffill',
'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith',
'cov', 'dtypes',
])
self.assertEqual(results, expected)

def assert_fp_equal(a, b):
Expand Down