From b5a07bc01eaf86944757d0a339b57f7c9c94fc66 Mon Sep 17 00:00:00 2001 From: Garrett Drapala Date: Sun, 17 Nov 2013 15:23:58 -0500 Subject: [PATCH] CLN: expand groupby dispatch whitelist (GH5480) - Create separate whitelists for SeriesGroupBy and DataFrameGroupBy objects - Improve groupby whitelist testing --- pandas/core/groupby.py | 38 ++++++++++++++----- pandas/tests/test_groupby.py | 73 +++++++++++++++++++++++++++++++++--- 2 files changed, 96 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index c635baa0e2739..7a7fe32963457 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -50,13 +50,30 @@ # forwarding methods from NDFrames _plotting_methods = frozenset(['plot', 'boxplot', 'hist']) -_apply_whitelist = frozenset(['last', 'first', - 'mean', 'sum', 'min', 'max', - 'cumsum', 'cumprod', 'cummin', 'cummax', - 'resample', - 'describe', - 'rank', 'quantile', 'count', - 'fillna', 'dtype']) | _plotting_methods +_common_apply_whitelist = frozenset([ + 'last', 'first', + 'head', 'tail', 'median', + 'mean', 'sum', 'min', 'max', + 'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount', + 'resample', + 'describe', + 'rank', 'quantile', 'count', + 'fillna', + 'mad', + 'any', 'all', + 'irow', 'take', + 'shift', 'tshift', + 'ffill', 'bfill', + 'pct_change', 'skew', + 'corr', 'cov', +]) | _plotting_methods + +_series_apply_whitelist = \ + (_common_apply_whitelist - set(['boxplot'])) | \ + frozenset(['dtype', 'value_counts']) + +_dataframe_apply_whitelist = \ + _common_apply_whitelist | frozenset(['dtypes', 'corrwith']) class GroupByError(Exception): @@ -185,6 +202,7 @@ class GroupBy(PandasObject): len(grouped) : int Number of groups """ + _apply_whitelist = _common_apply_whitelist def __init__(self, obj, keys=None, axis=0, level=None, grouper=None, exclusions=None, selection=None, as_index=True, @@ -252,7 +270,7 @@ def _selection_list(self): return self._selection def _local_dir(self): - return sorted(set(self.obj._local_dir() + list(_apply_whitelist))) + return sorted(set(self.obj._local_dir() + list(self._apply_whitelist))) def __getattr__(self, attr): if attr in self.obj: @@ -268,7 +286,7 @@ def __getitem__(self, key): raise NotImplementedError def _make_wrapper(self, name): - if name not in _apply_whitelist: + if name not in self._apply_whitelist: is_callable = callable(getattr(self.obj, name, None)) kind = ' callable ' if is_callable else ' ' msg = ("Cannot access{0}attribute {1!r} of {2!r} objects, try " @@ -1605,6 +1623,7 @@ def _convert_grouper(axis, grouper): class SeriesGroupBy(GroupBy): + _apply_whitelist = _series_apply_whitelist def aggregate(self, func_or_funcs, *args, **kwargs): """ @@ -2401,6 +2420,7 @@ def add_indices(): class DataFrameGroupBy(NDFrameGroupBy): + _apply_whitelist = _dataframe_apply_whitelist _block_agg_axis = 1 diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 76fee1702d64a..6802b57bc39d1 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -3221,10 +3221,67 @@ def test_groupby_whitelist(self): 'letters': Series(random_letters)}) s = df.floats - blacklist = ['eval', 'query', 'abs', 'shift', 'tshift', 'where', - 'mask', 'align', 'groupby', 'clip', 'astype', - 'at', 'combine', 'consolidate', 'convert_objects', - 'corr', 'corr_with', 'cov'] + df_whitelist = frozenset([ + 'last', 'first', + 'mean', 'sum', 'min', 'max', + 'head', 'tail', + 'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount', + 'resample', + 'describe', + 'rank', 'quantile', 'count', + 'fillna', + 'mad', + 'any', 'all', + 'irow', 'take', + 'shift', 'tshift', + 'ffill', 'bfill', + 'pct_change', 'skew', + 'plot', 'boxplot', 'hist', + 'median', 'dtypes', + 'corrwith', 'corr', 'cov', + ]) + s_whitelist = frozenset([ + 'last', 'first', + 'mean', 'sum', 'min', 'max', + 'head', 'tail', + 'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount', + 'resample', + 'describe', + 'rank', 'quantile', 'count', + 'fillna', + 'mad', + 'any', 'all', + 'irow', 'take', + 'shift', 'tshift', + 'ffill', 'bfill', + 'pct_change', 'skew', + 'plot', 'hist', + 'median', 'dtype', + 'corr', 'cov', + 'value_counts', + ]) + + for obj, whitelist in zip((df, s), + (df_whitelist, s_whitelist)): + gb = obj.groupby(df.letters) + self.assertEqual(whitelist, gb._apply_whitelist) + for m in whitelist: + getattr(gb, m) + + def test_groupby_blacklist(self): + from string import ascii_lowercase + letters = np.array(list(ascii_lowercase)) + N = 10 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), + 'letters': Series(random_letters)}) + s = df.floats + + blacklist = [ + 'eval', 'query', 'abs', 'where', + 'mask', 'align', 'groupby', 'clip', 'astype', + 'at', 'combine', 'consolidate', 'convert_objects', + ] to_methods = [method for method in dir(df) if method.startswith('to_')] blacklist.extend(to_methods) @@ -3319,8 +3376,12 @@ def test_tab_completion(self): 'groups','hist','indices','last','max','mean','median', 'min','name','ngroups','nth','ohlc','plot', 'prod', 'size','std','sum','transform','var', 'count', 'head', 'describe', - 'cummax', 'dtype', 'quantile', 'rank', 'cumprod', 'tail', - 'resample', 'cummin', 'fillna', 'cumsum', 'cumcount']) + 'cummax', 'quantile', 'rank', 'cumprod', 'tail', + 'resample', 'cummin', 'fillna', 'cumsum', 'cumcount', + 'all', 'shift', 'skew', 'bfill', 'irow', 'ffill', + 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', + 'cov', 'dtypes', + ]) self.assertEqual(results, expected) def assert_fp_equal(a, b):