From b5a07bc01eaf86944757d0a339b57f7c9c94fc66 Mon Sep 17 00:00:00 2001
From: Garrett Drapala <drapala@gmail.com>
Date: Sun, 17 Nov 2013 15:23:58 -0500
Subject: [PATCH] CLN: expand groupby dispatch whitelist (GH5480)

- Create separate whitelists for SeriesGroupBy and DataFrameGroupBy objects
- Improve groupby whitelist testing
---
 pandas/core/groupby.py       | 38 ++++++++++++++-----
 pandas/tests/test_groupby.py | 73 +++++++++++++++++++++++++++++++++---
 2 files changed, 96 insertions(+), 15 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index c635baa0e2739..7a7fe32963457 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -50,13 +50,30 @@
 # forwarding methods from NDFrames
 _plotting_methods = frozenset(['plot', 'boxplot', 'hist'])
 
-_apply_whitelist = frozenset(['last', 'first',
-                              'mean', 'sum', 'min', 'max',
-                              'cumsum', 'cumprod', 'cummin', 'cummax',
-                              'resample',
-                              'describe',
-                              'rank', 'quantile', 'count',
-                              'fillna', 'dtype']) | _plotting_methods
+_common_apply_whitelist = frozenset([
+    'last', 'first',
+    'head', 'tail', 'median',
+    'mean', 'sum', 'min', 'max',
+    'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
+    'resample',
+    'describe',
+    'rank', 'quantile', 'count',
+    'fillna',
+    'mad',
+    'any', 'all',
+    'irow', 'take',
+    'shift', 'tshift',
+    'ffill', 'bfill',
+    'pct_change', 'skew',
+    'corr', 'cov',
+]) | _plotting_methods
+
+_series_apply_whitelist = \
+    (_common_apply_whitelist - set(['boxplot'])) | \
+    frozenset(['dtype', 'value_counts'])
+
+_dataframe_apply_whitelist = \
+    _common_apply_whitelist | frozenset(['dtypes', 'corrwith'])
 
 
 class GroupByError(Exception):
@@ -185,6 +202,7 @@ class GroupBy(PandasObject):
     len(grouped) : int
         Number of groups
     """
+    _apply_whitelist = _common_apply_whitelist
 
     def __init__(self, obj, keys=None, axis=0, level=None,
                  grouper=None, exclusions=None, selection=None, as_index=True,
@@ -252,7 +270,7 @@ def _selection_list(self):
         return self._selection
 
     def _local_dir(self):
-        return sorted(set(self.obj._local_dir() + list(_apply_whitelist)))
+        return sorted(set(self.obj._local_dir() + list(self._apply_whitelist)))
 
     def __getattr__(self, attr):
         if attr in self.obj:
@@ -268,7 +286,7 @@ def __getitem__(self, key):
         raise NotImplementedError
 
     def _make_wrapper(self, name):
-        if name not in _apply_whitelist:
+        if name not in self._apply_whitelist:
             is_callable = callable(getattr(self.obj, name, None))
             kind = ' callable ' if is_callable else ' '
             msg = ("Cannot access{0}attribute {1!r} of {2!r} objects, try "
@@ -1605,6 +1623,7 @@ def _convert_grouper(axis, grouper):
 
 
 class SeriesGroupBy(GroupBy):
+    _apply_whitelist = _series_apply_whitelist
 
     def aggregate(self, func_or_funcs, *args, **kwargs):
         """
@@ -2401,6 +2420,7 @@ def add_indices():
 
 
 class DataFrameGroupBy(NDFrameGroupBy):
+    _apply_whitelist = _dataframe_apply_whitelist
 
     _block_agg_axis = 1
 
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 76fee1702d64a..6802b57bc39d1 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -3221,10 +3221,67 @@ def test_groupby_whitelist(self):
                         'letters': Series(random_letters)})
         s = df.floats
 
-        blacklist = ['eval', 'query', 'abs', 'shift', 'tshift', 'where',
-                     'mask', 'align', 'groupby', 'clip', 'astype',
-                     'at', 'combine', 'consolidate', 'convert_objects',
-                     'corr', 'corr_with', 'cov']
+        df_whitelist = frozenset([
+            'last', 'first',
+            'mean', 'sum', 'min', 'max',
+            'head', 'tail',
+            'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
+            'resample',
+            'describe',
+            'rank', 'quantile', 'count',
+            'fillna',
+            'mad',
+            'any', 'all',
+            'irow', 'take',
+            'shift', 'tshift',
+            'ffill', 'bfill',
+            'pct_change', 'skew',
+            'plot', 'boxplot', 'hist',
+            'median', 'dtypes',
+            'corrwith', 'corr', 'cov',
+        ])
+        s_whitelist = frozenset([
+            'last', 'first',
+            'mean', 'sum', 'min', 'max',
+            'head', 'tail',
+            'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount',
+            'resample',
+            'describe',
+            'rank', 'quantile', 'count',
+            'fillna',
+            'mad',
+            'any', 'all',
+            'irow', 'take',
+            'shift', 'tshift',
+            'ffill', 'bfill',
+            'pct_change', 'skew',
+            'plot', 'hist',
+            'median', 'dtype',
+            'corr', 'cov',
+            'value_counts',
+        ])
+
+        for obj, whitelist in zip((df, s),
+                                  (df_whitelist, s_whitelist)):
+            gb = obj.groupby(df.letters)
+            self.assertEqual(whitelist, gb._apply_whitelist)
+            for m in whitelist:
+                getattr(gb, m)
+
+    def test_groupby_blacklist(self):
+        from string import ascii_lowercase
+        letters = np.array(list(ascii_lowercase))
+        N = 10
+        random_letters = letters.take(np.random.randint(0, 26, N))
+        df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
+                        'letters': Series(random_letters)})
+        s = df.floats
+
+        blacklist = [
+            'eval', 'query', 'abs', 'where',
+            'mask', 'align', 'groupby', 'clip', 'astype',
+            'at', 'combine', 'consolidate', 'convert_objects',
+        ]
         to_methods = [method for method in dir(df) if method.startswith('to_')]
 
         blacklist.extend(to_methods)
@@ -3319,8 +3376,12 @@ def test_tab_completion(self):
             'groups','hist','indices','last','max','mean','median',
             'min','name','ngroups','nth','ohlc','plot', 'prod',
             'size','std','sum','transform','var', 'count', 'head', 'describe',
-            'cummax', 'dtype', 'quantile', 'rank', 'cumprod', 'tail',
-            'resample', 'cummin', 'fillna', 'cumsum', 'cumcount'])
+            'cummax', 'quantile', 'rank', 'cumprod', 'tail',
+            'resample', 'cummin', 'fillna', 'cumsum', 'cumcount',
+            'all', 'shift', 'skew', 'bfill', 'irow', 'ffill',
+            'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith',
+            'cov', 'dtypes',
+        ])
         self.assertEqual(results, expected)
 
 def assert_fp_equal(a, b):