From c58123c8e6e5b5596ceea2029525677cacda414e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 12 Feb 2018 14:23:31 -0800
Subject: [PATCH 01/25] Added test case for groupby fill methods

---
 pandas/tests/groupby/test_groupby.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 129ac6b06205c..d3b8d38688682 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2061,6 +2061,25 @@ def test_rank_object_raises(self, ties_method, ascending, na_option,
                                    ascending=ascending,
                                    na_option=na_option, pct=pct)
 
+    @pytest.mark.parametrize("fill_method,limit,exp_vals", [
+        ("ffill", None,
+         [np.nan, np.nan, 'foo', 'foo', 'foo', 'bar', 'bar', 'bar']),
+        ("ffill", 1,
+         [np.nan, np.nan, 'foo', 'foo', np.nan, 'bar', 'bar', np.nan]),
+        ("bfill", None,
+         ['foo', 'foo', 'foo', 'bar', 'bar', 'bar', np.nan, np.nan]),
+        ("bfill", 1,
+         [np.nan, 'foo', 'foo', np.nan, 'bar', 'bar', np.nan, np.nan])
+    ])
+    def test_group_fill_methods(self, fill_method, limit, exp_vals):
+        vals = [np.nan, np.nan, 'foo', np.nan, np.nan, 'bar', np.nan, np.nan]
+        keys = ['a'] * len(vals) + ['b'] * len(vals)
+        df = DataFrame({'key': keys, 'val': vals * 2})
+        result = getattr(df.groupby('key'), fill_method)(limit=limit)
+
+        exp = DataFrame({'key': keys, 'val': exp_vals * 2})
+        assert_frame_equal(result, exp)
+
     def test_dont_clobber_name_column(self):
         df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'],
                         'name': ['foo', 'bar', 'baz'] * 2})

From 2bc80239b92fa1d4fb23da8b0496739e819045c1 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 12 Feb 2018 16:07:37 -0800
Subject: [PATCH 02/25] Added code for group_fillna

---
 pandas/_libs/groupby_helper.pxi.in | 71 +++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index e03e3af65755b..31f30377ee26a 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -273,7 +273,7 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 {{endfor}}
 
 #----------------------------------------------------------------------
-# group_nth, group_last, group_rank
+# group_nth, group_last, group_rank, group_fillna
 #----------------------------------------------------------------------
 
 {{py:
@@ -574,6 +574,75 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
             for i in range(N):
                 out[i, 0] = out[i, 0] / grp_sizes[i, 0]
 {{endif}}
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_fillna_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
+                          ndarray[{{c_type}}, ndim=2] values,
+                          ndarray[int64_t] labels,
+                          object method,
+                          int64_t limit):
+    """Fills values forwards or backwards within a group
+
+    Parameters
+    ----------
+    out : array of {{dest_type2}} values which this method will write its
+        results to
+    values : array of {{c_type}} values which may require filling
+    labels : array containing unique label for each group, with its ordering
+        matching up to the corresponding record in `values`
+    method : {'ffill', 'bfill'}
+        Direction for fill to be applied (forwards or backwards, respectively)
+    limit : Consecutive values to fill before stopping, or -1 for no limit
+
+    Notes
+    -----
+    This method modifies the `out` parameter rather than returning an object
+    """
+    cdef:
+        Py_ssize_t i, N
+        ndarray[uint8_t] mask
+        ndarray[int64_t] sorted_labels
+        {{dest_type2}} curr_fill_val = {{nan_val}}
+        int64_t idx, filled_vals=0
+
+    N, K = (<object> values).shape
+
+    {{if name=='int64'}}
+    mask = (values[:, 0] == {{nan_val}}).astype(np.uint8)
+    {{elif name=='object'}}
+    mask = np.array([x != x for x in values[:, 0]]).astype(np.uint8)
+    {{else}}
+    mask = np.isnan(values[:, 0]).astype(np.uint8)
+    {{endif}}
+
+    sorted_labels = np.argsort(labels)
+    if method == 'bfill':
+        sorted_labels[::-1].sort()
+
+    {{if name == 'object'}}
+    if True:  # make templating happy
+    {{else}}
+    with nogil:
+    {{endif}}
+        for i in range(N):
+            idx = sorted_labels[i]
+            if mask[idx]:  # is missing
+                if limit == -1 or filled_vals < limit:
+                    out[idx, 0] = curr_fill_val
+                else:
+                    out[idx, 0] == {{nan_val}}
+                filled_vals += 1
+            else:  # reset items when not missing
+                filled_vals = 0
+                curr_fill_val = values[idx, 0]
+                out[idx, 0] = values[idx, 0]
+
+            # If we move to the next group, reset
+            # the fill_val and counter
+            if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
+                curr_fill_val = {{nan_val}}
+                filled_vals = 0
 {{endfor}}
 
 

From 3cb25c014a5031e0dcb3cfcaf755b3ebf2562d54 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 12 Feb 2018 22:46:59 -0800
Subject: [PATCH 03/25] Added ASV benchmarks

---
 asv_bench/benchmarks/groupby.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 61db39528a5fb..c347442784d41 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -370,11 +370,11 @@ class GroupByMethods(object):
 
     param_names = ['dtype', 'method']
     params = [['int', 'float'],
-              ['all', 'any', 'count', 'cumcount', 'cummax', 'cummin',
-               'cumprod', 'cumsum', 'describe', 'first', 'head', 'last', 'mad',
-               'max', 'min', 'median', 'mean', 'nunique', 'pct_change', 'prod',
-               'rank', 'sem', 'shift', 'size', 'skew', 'std', 'sum', 'tail',
-               'unique', 'value_counts', 'var']]
+              ['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
+               'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
+               'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
+               'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
+               'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]
 
     def setup(self, dtype, method):
         ngroups = 1000

From 7fecc1165791ef30f173da47ebe4581f73b78dd0 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 12 Feb 2018 22:49:00 -0800
Subject: [PATCH 04/25] Connected GroupBy method to Cython fillna

---
 pandas/core/groupby.py | 113 ++++++++++++++++++++++++++++++-----------
 1 file changed, 82 insertions(+), 31 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index b1615f720368d..204a0f55e29a0 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -877,21 +877,28 @@ def apply(self, func, *args, **kwargs):
 
         func = self._is_builtin_func(func)
 
-        # this is needed so we don't try and wrap strings. If we could
-        # resolve functions to their callable functions prior, this
-        # wouldn't be needed
-        if args or kwargs:
-            if callable(func):
-
-                @wraps(func)
-                def f(g):
-                    with np.errstate(all='ignore'):
-                        return func(g, *args, **kwargs)
+        # Try to go down the Cython path first
+        try:
+            f = self.grouper._cython_functions['apply'][func]
+            return self.grouper._cython_apply(f, self._selected_obj, self.axis,
+                                              **kwargs)
+        except KeyError:
+            # this is needed so we don't try and wrap strings. If we could
+            # resolve functions to their callable functions prior, this
+            # wouldn't be needed
+            if args or kwargs:
+                if callable(func):
+
+                    @wraps(func)
+                    def f(g):
+                        with np.errstate(all='ignore'):
+                            return func(g, *args, **kwargs)
+                else:
+                    raise ValueError('func must be a callable if args or '
+                                     'kwargs are supplied and func is not '
+                                     'implemented in Cython')
             else:
-                raise ValueError('func must be a callable if args or '
-                                 'kwargs are supplied')
-        else:
-            f = func
+                f = func
 
         # ignore SettingWithCopy here in case the user mutates
         with option_context('mode.chained_assignment', None):
@@ -1474,7 +1481,7 @@ def pad(self, limit=None):
         Series.fillna
         DataFrame.fillna
         """
-        return self.apply(lambda x: x.ffill(limit=limit))
+        return self.apply('ffill', limit=limit)
     ffill = pad
 
     @Substitution(name='groupby')
@@ -1494,7 +1501,7 @@ def backfill(self, limit=None):
         Series.fillna
         DataFrame.fillna
         """
-        return self.apply(lambda x: x.bfill(limit=limit))
+        return self.apply('bfill', limit=limit)
     bfill = backfill
 
     @Substitution(name='groupby')
@@ -2034,6 +2041,32 @@ def _get_group_keys(self):
                                           self.levels,
                                           self.labels)
 
+    def _cython_apply(self, f, data, axis, **kwargs):
+        output = collections.OrderedDict()
+        for col in data.columns:
+            if col in self.names:
+                output[col] = data[col].values
+            else:
+                # duplicative of _get_cython_function; needs refactor
+                dtype_str = data[col].dtype.name
+                values = data[col].values[:, None]
+                func = afunc = self._get_func(f['name'], dtype_str)
+                f = f.get('f')
+
+                def wrapper(*args, **kwargs):
+                    return f(afunc, *args, **kwargs)
+
+                func = wrapper
+                labels, _, _ = self.group_info
+
+                result = _maybe_fill(np.empty_like(values, dtype=dtype_str),
+                                     fill_value=np.nan)
+                func(result, values, labels, **kwargs)
+                output[col] = result[:, 0]
+
+        # Ugh
+        return DataFrame(output, index=data.index)
+
     def apply(self, f, data, axis=0):
         mutated = self.mutated
         splitter = self._get_splitter(data, axis=axis)
@@ -2230,6 +2263,22 @@ def get_group_levels(self):
                     kwargs.get('na_option', 'keep')
                 )
             }
+        },
+        'apply': {
+            'ffill': {
+                'name': 'group_fillna',
+                'f': lambda func, a, b, c, **kwargs: func(
+                    a, b, c,
+                    'ffill', kwargs['limit'] if kwargs['limit'] else -1
+                )
+            },
+            'bfill': {
+                'name': 'group_fillna',
+                'f': lambda func, a, b, c, **kwargs: func(
+                    a, b, c,
+                    'bfill', kwargs['limit'] if kwargs['limit'] else -1
+                )
+            }
         }
     }
 
@@ -2248,27 +2297,28 @@ def _is_builtin_func(self, arg):
         """
         return SelectionMixin._builtin_table.get(arg, arg)
 
-    def _get_cython_function(self, kind, how, values, is_numeric):
-
-        dtype_str = values.dtype.name
+    def _get_func(self, fname, dtype_str=None, is_numeric=False):
+        # see if there is a fused-type version of function
+        # only valid for numeric
+        f = getattr(libgroupby, fname, None)
+        if f is not None and is_numeric:
+            return f
 
-        def get_func(fname):
-            # see if there is a fused-type version of function
-            # only valid for numeric
-            f = getattr(libgroupby, fname, None)
-            if f is not None and is_numeric:
+        # otherwise find dtype-specific version, falling back to object
+        for dt in [dtype_str, 'object']:
+            f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
+            if f is not None:
                 return f
 
-            # otherwise find dtype-specific version, falling back to object
-            for dt in [dtype_str, 'object']:
-                f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
-                if f is not None:
-                    return f
+    def _get_cython_function(self, kind, how, values, is_numeric):
+
+        dtype_str = values.dtype.name
 
         ftype = self._cython_functions[kind][how]
 
         if isinstance(ftype, dict):
-            func = afunc = get_func(ftype['name'])
+            func = afunc = self._get_func(ftype['name'], dtype_str=dtype_str,
+                                          is_numeric=is_numeric)
 
             # a sub-function
             f = ftype.get('f')
@@ -2281,7 +2331,8 @@ def wrapper(*args, **kwargs):
                 func = wrapper
 
         else:
-            func = get_func(ftype)
+            func = self._get_func(ftype, dtype_str=dtype_str,
+                                  is_numeric=is_numeric)
 
         if func is None:
             raise NotImplementedError("function is not implemented for this"

From 3c2fb366a861479644e441153124c1f7751cbd09 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 12 Feb 2018 23:26:17 -0800
Subject: [PATCH 05/25] Fixed issue when filling Series after GroupBy

---
 pandas/core/groupby.py               | 48 ++++++++++++++++------------
 pandas/tests/groupby/test_groupby.py | 15 ++++++---
 2 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 204a0f55e29a0..e36cd78a02181 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2041,31 +2041,37 @@ def _get_group_keys(self):
                                           self.levels,
                                           self.labels)
 
-    def _cython_apply(self, f, data, axis, **kwargs):
-        output = collections.OrderedDict()
-        for col in data.columns:
-            if col in self.names:
-                output[col] = data[col].values
-            else:
-                # duplicative of _get_cython_function; needs refactor
-                dtype_str = data[col].dtype.name
-                values = data[col].values[:, None]
-                func = afunc = self._get_func(f['name'], dtype_str)
-                f = f.get('f')
+    def _cython_apply(self, ftype, data, axis, **kwargs):
+        def _generate_output(ser):
+            # duplicative of _get_cython_function; needs refactor
+            dtype_str = ser.dtype.name
+            values = ser.values[:, None]
+            func = afunc = self._get_func(ftype['name'], dtype_str)
+            f = ftype.get('f')
 
-                def wrapper(*args, **kwargs):
-                    return f(afunc, *args, **kwargs)
+            def wrapper(*args, **kwargs):
+                return f(afunc, *args, **kwargs)
 
-                func = wrapper
-                labels, _, _ = self.group_info
+            func = wrapper
+            labels, _, _ = self.group_info
+
+            result = _maybe_fill(np.empty_like(values, dtype=dtype_str),
+                                 fill_value=np.nan)
+            func(result, values, labels, **kwargs)
 
-                result = _maybe_fill(np.empty_like(values, dtype=dtype_str),
-                                     fill_value=np.nan)
-                func(result, values, labels, **kwargs)
-                output[col] = result[:, 0]
+            return result[:, 0]
 
-        # Ugh
-        return DataFrame(output, index=data.index)
+        # Using introspection to determine result; not ideal needs refactor
+        if type(data) is Series:
+            return Series(_generate_output(data), name=data.name)
+        else:
+            output = collections.OrderedDict()
+            for col in data.columns:
+                if col in self.names:
+                    output[col] = data[col].values
+                else:
+                    output[col] = _generate_output(data[col])
+            return DataFrame(output, index=data.index)
 
     def apply(self, f, data, axis=0):
         mutated = self.mutated
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index d3b8d38688682..e26ef05b0a5db 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2061,6 +2061,7 @@ def test_rank_object_raises(self, ties_method, ascending, na_option,
                                    ascending=ascending,
                                    na_option=na_option, pct=pct)
 
+    @pytest.mark.parametrize("as_series", [True, False])
     @pytest.mark.parametrize("fill_method,limit,exp_vals", [
         ("ffill", None,
          [np.nan, np.nan, 'foo', 'foo', 'foo', 'bar', 'bar', 'bar']),
@@ -2071,14 +2072,20 @@ def test_rank_object_raises(self, ties_method, ascending, na_option,
         ("bfill", 1,
          [np.nan, 'foo', 'foo', np.nan, 'bar', 'bar', np.nan, np.nan])
     ])
-    def test_group_fill_methods(self, fill_method, limit, exp_vals):
+    def test_group_fill_methods(self, as_series, fill_method, limit, exp_vals):
         vals = [np.nan, np.nan, 'foo', np.nan, np.nan, 'bar', np.nan, np.nan]
         keys = ['a'] * len(vals) + ['b'] * len(vals)
         df = DataFrame({'key': keys, 'val': vals * 2})
-        result = getattr(df.groupby('key'), fill_method)(limit=limit)
 
-        exp = DataFrame({'key': keys, 'val': exp_vals * 2})
-        assert_frame_equal(result, exp)
+        if as_series:
+            result = getattr(
+                df.groupby('key')['val'], fill_method)(limit=limit)
+            exp = Series(exp_vals * 2, name='val')
+            assert_series_equal(result, exp)
+        else:
+            result = getattr(df.groupby('key'), fill_method)(limit=limit)
+            exp = DataFrame({'key': keys, 'val': exp_vals * 2})
+            assert_frame_equal(result, exp)
 
     def test_dont_clobber_name_column(self):
         df = DataFrame({'key': ['a', 'a', 'a', 'b', 'b', 'b'],

From a52b8c4b9346a5d4a98b179f6bd252901c855e2e Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 13 Feb 2018 12:55:52 -0800
Subject: [PATCH 06/25] Added tests to mix group entries; fixed sort bug

---
 pandas/_libs/groupby_helper.pxi.in   |  2 +-
 pandas/tests/groupby/test_groupby.py | 51 ++++++++++++++++++++++------
 2 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 31f30377ee26a..13dfaaee1b3e7 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -618,7 +618,7 @@ def group_fillna_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
     sorted_labels = np.argsort(labels)
     if method == 'bfill':
-        sorted_labels[::-1].sort()
+        sorted_labels = sorted_labels[::-1]
 
     {{if name == 'object'}}
     if True:  # make templating happy
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index e26ef05b0a5db..2429e9975fc8e 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2061,30 +2061,59 @@ def test_rank_object_raises(self, ties_method, ascending, na_option,
                                    ascending=ascending,
                                    na_option=na_option, pct=pct)
 
+    @pytest.mark.parametrize("mix_groupings", [True, False])
     @pytest.mark.parametrize("as_series", [True, False])
+    @pytest.mark.parametrize("val1,val2", [
+        ('foo', 'bar'), (1, 2), (1., 2.)])
     @pytest.mark.parametrize("fill_method,limit,exp_vals", [
         ("ffill", None,
-         [np.nan, np.nan, 'foo', 'foo', 'foo', 'bar', 'bar', 'bar']),
+         [np.nan, np.nan, 'val1', 'val1', 'val1', 'val2', 'val2', 'val2']),
         ("ffill", 1,
-         [np.nan, np.nan, 'foo', 'foo', np.nan, 'bar', 'bar', np.nan]),
+         [np.nan, np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan]),
         ("bfill", None,
-         ['foo', 'foo', 'foo', 'bar', 'bar', 'bar', np.nan, np.nan]),
+         ['val1', 'val1', 'val1', 'val2', 'val2', 'val2', np.nan, np.nan]),
         ("bfill", 1,
-         [np.nan, 'foo', 'foo', np.nan, 'bar', 'bar', np.nan, np.nan])
+         [np.nan, 'val1', 'val1', np.nan, 'val2', 'val2', np.nan, np.nan])
     ])
-    def test_group_fill_methods(self, as_series, fill_method, limit, exp_vals):
-        vals = [np.nan, np.nan, 'foo', np.nan, np.nan, 'bar', np.nan, np.nan]
-        keys = ['a'] * len(vals) + ['b'] * len(vals)
-        df = DataFrame({'key': keys, 'val': vals * 2})
-
+    def test_group_fill_methods(self, mix_groupings, as_series, val1, val2,
+                                fill_method, limit, exp_vals):
+        vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan]
+        _exp_vals = list(exp_vals)
+        # Overwrite placeholder values
+        for index, exp_val in enumerate(_exp_vals):
+            if exp_val == 'val1':
+                _exp_vals[index] = val1
+            elif exp_val == 'val2':
+                _exp_vals[index] = val2
+
+        # Need to modify values and expectations depending on the
+        # Series / DataFrame that we ultimately want to generate
+        if mix_groupings:  # ['a', 'b', 'a, 'b', ...]
+            keys = ['a', 'b'] * len(vals)
+
+            def interweave(list_obj):
+                temp = list()
+                for x in list_obj:
+                    temp.extend([x, x])
+
+                return temp
+
+            _exp_vals = interweave(_exp_vals)
+            vals = interweave(vals)
+        else:  # ['a', 'a', 'a', ... 'b', 'b', 'b']
+            keys = ['a'] * len(vals) + ['b'] * len(vals)
+            _exp_vals = _exp_vals * 2
+            vals = vals * 2
+
+        df = DataFrame({'key': keys, 'val': vals})
         if as_series:
             result = getattr(
                 df.groupby('key')['val'], fill_method)(limit=limit)
-            exp = Series(exp_vals * 2, name='val')
+            exp = Series(_exp_vals, name='val')
             assert_series_equal(result, exp)
         else:
             result = getattr(df.groupby('key'), fill_method)(limit=limit)
-            exp = DataFrame({'key': keys, 'val': exp_vals * 2})
+            exp = DataFrame({'key': keys, 'val': _exp_vals})
             assert_frame_equal(result, exp)
 
     def test_dont_clobber_name_column(self):

From 16c1823b5ee368ef72e86cae370e15c9c7e5be95 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 13 Feb 2018 15:00:50 -0800
Subject: [PATCH 07/25] Simplied groupby Cython calls for ffill/bfill

---
 pandas/_libs/groupby_helper.pxi.in |  55 ++++++++++++
 pandas/core/groupby.py             | 140 +++++++++++------------------
 2 files changed, 106 insertions(+), 89 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 13dfaaee1b3e7..af9d6b926f23f 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -1023,3 +1023,58 @@ def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
                     out[ii] = -1
 
                 label_indexer[lab, idxer_slot] = ii
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_fillna_indexer(ndarray[int64_t] out,
+                         ndarray[uint8_t] mask,
+                         ndarray[int64_t] labels,
+                         object method,
+                         int64_t limit):
+    """Fills values forwards or backwards within a group
+
+    Parameters
+    ----------
+    out : array of int64_t values which this method will write its results to
+        Missing values will be written to with a value of -1
+    mask : array of int64_t values where a 1 indicates a missing value
+    labels : array containing unique label for each group, with its ordering
+        matching up to the corresponding record in `values`
+    method : {'ffill', 'bfill'}
+        Direction for fill to be applied (forwards or backwards, respectively)
+    limit : Consecutive values to fill before stopping, or -1 for no limit
+
+    Notes
+    -----
+    This method modifies the `out` parameter rather than returning an object
+    """
+    cdef:
+        Py_ssize_t i, N
+        ndarray[int64_t] sorted_labels
+        int64_t curr_fill_idx=-1
+        int64_t idx, filled_vals=0
+
+    N = len(out)
+
+    sorted_labels = np.argsort(labels)
+    if method == 'bfill':
+        sorted_labels = sorted_labels[::-1]
+
+    with nogil:
+        for i in range(N):
+            idx = sorted_labels[i]
+            if mask[idx] == 1:  # is missing
+                # Stop filling once we've hit the limit
+                if filled_vals >= limit and limit != -1:
+                    curr_fill_idx = -1
+                filled_vals += 1
+            else:  # reset items when not missing
+                filled_vals = 0
+                curr_fill_idx = idx
+
+            out[idx] = curr_fill_idx
+            # If we move to the next group, reset
+            # the fill_idx and counter
+            if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
+                curr_fill_idx = -1
+                filled_vals = 0
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index e36cd78a02181..5c434d9546b4a 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -38,7 +38,7 @@
     _ensure_float)
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.generic import ABCSeries
-from pandas.core.dtypes.missing import isna, notna, _maybe_fill
+from pandas.core.dtypes.missing import isna, isnull, notna, _maybe_fill
 
 from pandas.core.base import (PandasObject, SelectionMixin, GroupByError,
                               DataError, SpecificationError)
@@ -877,28 +877,21 @@ def apply(self, func, *args, **kwargs):
 
         func = self._is_builtin_func(func)
 
-        # Try to go down the Cython path first
-        try:
-            f = self.grouper._cython_functions['apply'][func]
-            return self.grouper._cython_apply(f, self._selected_obj, self.axis,
-                                              **kwargs)
-        except KeyError:
-            # this is needed so we don't try and wrap strings. If we could
-            # resolve functions to their callable functions prior, this
-            # wouldn't be needed
-            if args or kwargs:
-                if callable(func):
-
-                    @wraps(func)
-                    def f(g):
-                        with np.errstate(all='ignore'):
-                            return func(g, *args, **kwargs)
-                else:
-                    raise ValueError('func must be a callable if args or '
-                                     'kwargs are supplied and func is not '
-                                     'implemented in Cython')
+        # this is needed so we don't try and wrap strings. If we could
+        # resolve functions to their callable functions prior, this
+        # wouldn't be needed
+        if args or kwargs:
+            if callable(func):
+
+                @wraps(func)
+                def f(g):
+                    with np.errstate(all='ignore'):
+                        return func(g, *args, **kwargs)
             else:
-                f = func
+                raise ValueError('func must be a callable if args or '
+                                 'kwargs are supplied')
+        else:
+            f = func
 
         # ignore SettingWithCopy here in case the user mutates
         with option_context('mode.chained_assignment', None):
@@ -1464,6 +1457,25 @@ def expanding(self, *args, **kwargs):
         from pandas.core.window import ExpandingGroupby
         return ExpandingGroupby(self, *args, **kwargs)
 
+    def _fill(self, how, limit=None):
+        labels, _, _ = self.grouper.group_info
+
+        # Need int value for Cython
+        if limit is None:
+            limit = -1
+        output = {}
+        if type(self) is DataFrameGroupBy:
+            for nm in self.grouper.names:
+                output[nm] = self.obj[nm].values
+        for name, obj in self._iterate_slices():
+            indexer = np.zeros_like(labels)
+            mask = isnull(obj.values).view(np.uint8)
+            libgroupby.group_fillna_indexer(indexer, mask, labels, how,
+                                            limit)
+            output[name] = algorithms.take_nd(obj.values, indexer)
+
+        return self._wrap_transformed_output(output)
+
     @Substitution(name='groupby')
     def pad(self, limit=None):
         """
@@ -1481,7 +1493,7 @@ def pad(self, limit=None):
         Series.fillna
         DataFrame.fillna
         """
-        return self.apply('ffill', limit=limit)
+        return self._fill('ffill', limit=limit)
     ffill = pad
 
     @Substitution(name='groupby')
@@ -1501,7 +1513,7 @@ def backfill(self, limit=None):
         Series.fillna
         DataFrame.fillna
         """
-        return self.apply('bfill', limit=limit)
+        return self._fill('bfill', limit=limit)
     bfill = backfill
 
     @Substitution(name='groupby')
@@ -2041,38 +2053,6 @@ def _get_group_keys(self):
                                           self.levels,
                                           self.labels)
 
-    def _cython_apply(self, ftype, data, axis, **kwargs):
-        def _generate_output(ser):
-            # duplicative of _get_cython_function; needs refactor
-            dtype_str = ser.dtype.name
-            values = ser.values[:, None]
-            func = afunc = self._get_func(ftype['name'], dtype_str)
-            f = ftype.get('f')
-
-            def wrapper(*args, **kwargs):
-                return f(afunc, *args, **kwargs)
-
-            func = wrapper
-            labels, _, _ = self.group_info
-
-            result = _maybe_fill(np.empty_like(values, dtype=dtype_str),
-                                 fill_value=np.nan)
-            func(result, values, labels, **kwargs)
-
-            return result[:, 0]
-
-        # Using introspection to determine result; not ideal needs refactor
-        if type(data) is Series:
-            return Series(_generate_output(data), name=data.name)
-        else:
-            output = collections.OrderedDict()
-            for col in data.columns:
-                if col in self.names:
-                    output[col] = data[col].values
-                else:
-                    output[col] = _generate_output(data[col])
-            return DataFrame(output, index=data.index)
-
     def apply(self, f, data, axis=0):
         mutated = self.mutated
         splitter = self._get_splitter(data, axis=axis)
@@ -2269,22 +2249,6 @@ def get_group_levels(self):
                     kwargs.get('na_option', 'keep')
                 )
             }
-        },
-        'apply': {
-            'ffill': {
-                'name': 'group_fillna',
-                'f': lambda func, a, b, c, **kwargs: func(
-                    a, b, c,
-                    'ffill', kwargs['limit'] if kwargs['limit'] else -1
-                )
-            },
-            'bfill': {
-                'name': 'group_fillna',
-                'f': lambda func, a, b, c, **kwargs: func(
-                    a, b, c,
-                    'bfill', kwargs['limit'] if kwargs['limit'] else -1
-                )
-            }
         }
     }
 
@@ -2303,28 +2267,27 @@ def _is_builtin_func(self, arg):
         """
         return SelectionMixin._builtin_table.get(arg, arg)
 
-    def _get_func(self, fname, dtype_str=None, is_numeric=False):
-        # see if there is a fused-type version of function
-        # only valid for numeric
-        f = getattr(libgroupby, fname, None)
-        if f is not None and is_numeric:
-            return f
-
-        # otherwise find dtype-specific version, falling back to object
-        for dt in [dtype_str, 'object']:
-            f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
-            if f is not None:
-                return f
-
     def _get_cython_function(self, kind, how, values, is_numeric):
 
         dtype_str = values.dtype.name
 
+        def get_func(fname):
+            # see if there is a fused-type version of function
+            # only valid for numeric
+            f = getattr(libgroupby, fname, None)
+            if f is not None and is_numeric:
+                return f
+
+            # otherwise find dtype-specific version, falling back to object
+            for dt in [dtype_str, 'object']:
+                f = getattr(libgroupby, "%s_%s" % (fname, dtype_str), None)
+                if f is not None:
+                    return f
+
         ftype = self._cython_functions[kind][how]
 
         if isinstance(ftype, dict):
-            func = afunc = self._get_func(ftype['name'], dtype_str=dtype_str,
-                                          is_numeric=is_numeric)
+            func = afunc = get_func(ftype['name'])
 
             # a sub-function
             f = ftype.get('f')
@@ -2337,8 +2300,7 @@ def wrapper(*args, **kwargs):
                 func = wrapper
 
         else:
-            func = self._get_func(ftype, dtype_str=dtype_str,
-                                  is_numeric=is_numeric)
+            func = get_func(ftype)
 
         if func is None:
             raise NotImplementedError("function is not implemented for this"

From bd3d5e0f5f079305fb1f495690e8ed57f27e5ae4 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 13 Feb 2018 15:18:58 -0800
Subject: [PATCH 08/25] Removed abandoned Cython implementation

---
 pandas/_libs/groupby_helper.pxi.in | 71 +-----------------------------
 1 file changed, 1 insertion(+), 70 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index af9d6b926f23f..4d005a23a2e03 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -273,7 +273,7 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 {{endfor}}
 
 #----------------------------------------------------------------------
-# group_nth, group_last, group_rank, group_fillna
+# group_nth, group_last, group_rank
 #----------------------------------------------------------------------
 
 {{py:
@@ -574,75 +574,6 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
             for i in range(N):
                 out[i, 0] = out[i, 0] / grp_sizes[i, 0]
 {{endif}}
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def group_fillna_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
-                          ndarray[{{c_type}}, ndim=2] values,
-                          ndarray[int64_t] labels,
-                          object method,
-                          int64_t limit):
-    """Fills values forwards or backwards within a group
-
-    Parameters
-    ----------
-    out : array of {{dest_type2}} values which this method will write its
-        results to
-    values : array of {{c_type}} values which may require filling
-    labels : array containing unique label for each group, with its ordering
-        matching up to the corresponding record in `values`
-    method : {'ffill', 'bfill'}
-        Direction for fill to be applied (forwards or backwards, respectively)
-    limit : Consecutive values to fill before stopping, or -1 for no limit
-
-    Notes
-    -----
-    This method modifies the `out` parameter rather than returning an object
-    """
-    cdef:
-        Py_ssize_t i, N
-        ndarray[uint8_t] mask
-        ndarray[int64_t] sorted_labels
-        {{dest_type2}} curr_fill_val = {{nan_val}}
-        int64_t idx, filled_vals=0
-
-    N, K = (<object> values).shape
-
-    {{if name=='int64'}}
-    mask = (values[:, 0] == {{nan_val}}).astype(np.uint8)
-    {{elif name=='object'}}
-    mask = np.array([x != x for x in values[:, 0]]).astype(np.uint8)
-    {{else}}
-    mask = np.isnan(values[:, 0]).astype(np.uint8)
-    {{endif}}
-
-    sorted_labels = np.argsort(labels)
-    if method == 'bfill':
-        sorted_labels = sorted_labels[::-1]
-
-    {{if name == 'object'}}
-    if True:  # make templating happy
-    {{else}}
-    with nogil:
-    {{endif}}
-        for i in range(N):
-            idx = sorted_labels[i]
-            if mask[idx]:  # is missing
-                if limit == -1 or filled_vals < limit:
-                    out[idx, 0] = curr_fill_val
-                else:
-                    out[idx, 0] == {{nan_val}}
-                filled_vals += 1
-            else:  # reset items when not missing
-                filled_vals = 0
-                curr_fill_val = values[idx, 0]
-                out[idx, 0] = values[idx, 0]
-
-            # If we move to the next group, reset
-            # the fill_val and counter
-            if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
-                curr_fill_val = {{nan_val}}
-                filled_vals = 0
 {{endfor}}
 
 

From cae65af85fc9c4d4c00585ed82953db855092a0c Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 13 Feb 2018 15:21:15 -0800
Subject: [PATCH 09/25] Added upcast to int64 to prevent 32 bit failures

---
 pandas/_libs/groupby_helper.pxi.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 4d005a23a2e03..b4db6a08c8c45 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -987,7 +987,7 @@ def group_fillna_indexer(ndarray[int64_t] out,
 
     N = len(out)
 
-    sorted_labels = np.argsort(labels)
+    sorted_labels = np.argsort(labels).view(dtype=np.int64)
     if method == 'bfill':
         sorted_labels = sorted_labels[::-1]
 

From 02665142779c2f182b4376d9c46e069e554bd897 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 13 Feb 2018 22:30:51 -0800
Subject: [PATCH 10/25] Fixed issue with reconstructing grouped Series

---
 pandas/core/groupby.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 5c434d9546b4a..e55cb5b658150 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1465,8 +1465,9 @@ def _fill(self, how, limit=None):
             limit = -1
         output = {}
         if type(self) is DataFrameGroupBy:
-            for nm in self.grouper.names:
-                output[nm] = self.obj[nm].values
+            for grp in self.grouper.groupings:
+                ser = grp.group_index.take(grp.labels)
+                output[ser.name] = ser.values
         for name, obj in self._iterate_slices():
             indexer = np.zeros_like(labels)
             mask = isnull(obj.values).view(np.uint8)

From 50dc6906c37c83d7acdd254fbc86f61ba69c98dc Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 14 Feb 2018 16:23:51 -0800
Subject: [PATCH 11/25] Changed .view to .astype to avoid 32 bit segfaults

---
 pandas/_libs/groupby_helper.pxi.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index b4db6a08c8c45..9a4cb61e306aa 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -987,7 +987,7 @@ def group_fillna_indexer(ndarray[int64_t] out,
 
     N = len(out)
 
-    sorted_labels = np.argsort(labels).view(dtype=np.int64)
+    sorted_labels = np.argsort(labels).astype(np.int64, copy=False)
     if method == 'bfill':
         sorted_labels = sorted_labels[::-1]
 

From 9fa8e255773a518f6f7623e88e92ad142ddcf797 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Wed, 14 Feb 2018 16:24:13 -0800
Subject: [PATCH 12/25] Added whatsnew

---
 doc/source/whatsnew/v0.23.0.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index fd3c3a5a7a301..44e5fa790fcf2 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -689,6 +689,7 @@ Performance Improvements
 - Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`)
 - Improved performance of :func:`DataFrameGroupBy.rank` (:issue:`15779`)
 - Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`)
+- Improved performance of :func:`GroupBy.ffill` and :func:`GroupBy.bfill` (:issue:`11296`)
 
 .. _whatsnew_0230.docs:
 

From 5da06d868dc5cf044a84a32dcf74f0924d2b919d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 19 Feb 2018 10:50:33 -0800
Subject: [PATCH 13/25] Aligned group_fillna and group_shift signatures

---
 pandas/_libs/groupby_helper.pxi.in | 20 ++++---
 pandas/core/groupby.py             | 92 +++++++++++++++++++++---------
 2 files changed, 75 insertions(+), 37 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 9a4cb61e306aa..b0dca670eea8d 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -906,7 +906,7 @@ def group_cumsum(numeric[:, :] out,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
+def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
                         int ngroups, int periods):
     cdef:
         Py_ssize_t N, i, j, ii
@@ -957,21 +957,19 @@ def group_shift_indexer(int64_t[:] out, int64_t[:] labels,
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def group_fillna_indexer(ndarray[int64_t] out,
-                         ndarray[uint8_t] mask,
-                         ndarray[int64_t] labels,
-                         object method,
+def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
+                         ndarray[uint8_t] mask, object direction,
                          int64_t limit):
-    """Fills values forwards or backwards within a group
+    """Indexes how to fill values forwards or backwards within a group
 
     Parameters
     ----------
     out : array of int64_t values which this method will write its results to
         Missing values will be written to with a value of -1
-    mask : array of int64_t values where a 1 indicates a missing value
     labels : array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`
-    method : {'ffill', 'bfill'}
+    mask : array of int64_t values where a 1 indicates a missing value
+    direction : {'ffill', 'bfill'}
         Direction for fill to be applied (forwards or backwards, respectively)
     limit : Consecutive values to fill before stopping, or -1 for no limit
 
@@ -987,8 +985,11 @@ def group_fillna_indexer(ndarray[int64_t] out,
 
     N = len(out)
 
+    # Make sure all arrays are the same size
+    assert N == len(labels) == len(mask)
+
     sorted_labels = np.argsort(labels).astype(np.int64, copy=False)
-    if method == 'bfill':
+    if direction == 'bfill':
         sorted_labels = sorted_labels[::-1]
 
     with nogil:
@@ -1004,6 +1005,7 @@ def group_fillna_indexer(ndarray[int64_t] out,
                 curr_fill_idx = idx
 
             out[idx] = curr_fill_idx
+
             # If we move to the next group, reset
             # the fill_idx and counter
             if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index e55cb5b658150..d386966d7c8a1 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1,5 +1,5 @@
 import types
-from functools import wraps
+from functools import wraps, partial
 import numpy as np
 import datetime
 import collections
@@ -1457,25 +1457,14 @@ def expanding(self, *args, **kwargs):
         from pandas.core.window import ExpandingGroupby
         return ExpandingGroupby(self, *args, **kwargs)
 
-    def _fill(self, how, limit=None):
-        labels, _, _ = self.grouper.group_info
-
+    def _fill(self, direction, limit=None):
         # Need int value for Cython
         if limit is None:
             limit = -1
-        output = {}
-        if type(self) is DataFrameGroupBy:
-            for grp in self.grouper.groupings:
-                ser = grp.group_index.take(grp.labels)
-                output[ser.name] = ser.values
-        for name, obj in self._iterate_slices():
-            indexer = np.zeros_like(labels)
-            mask = isnull(obj.values).view(np.uint8)
-            libgroupby.group_fillna_indexer(indexer, mask, labels, how,
-                                            limit)
-            output[name] = algorithms.take_nd(obj.values, indexer)
 
-        return self._wrap_transformed_output(output)
+        return self._get_cythonized_result('group_fillna_indexer',
+                                           self.grouper, needs_mask=True,
+                                           direction=direction, limit=limit)
 
     @Substitution(name='groupby')
     def pad(self, limit=None):
@@ -1863,6 +1852,52 @@ def cummax(self, axis=0, **kwargs):
 
         return self._cython_transform('cummax', numeric_only=False)
 
+    def _get_cythonized_result(self, how, grouper, needs_mask=False,
+                               needs_ngroups=False, **kwargs):
+        """Get result for Cythonized functions
+
+        Parameters
+        ----------
+        how : str, Cythonized function name to be called
+        grouper : Grouper object containing pertinent group info
+        needs_mask : bool, default False
+            Whether boolean mask needs to be part of the Cython call signature
+        needs_ngroups : bool, default False
+            Whether number of groups part of the Cython call signature
+        **kwargs : dict
+            Extra arguments required for the given function. This method
+            internally stores an OrderedDict that maps those keywords to
+            positional arguments before calling the Cython layer
+
+        Returns
+        -------
+        GroupBy object populated with appropriate result(s)
+        """
+        exp_kwds = collections.OrderedDict([
+            (('group_fillna_indexer'), ('direction', 'limit')),
+            (('group_shift_indexer'), ('nperiods',))])
+
+        labels, _, ngroups = grouper.group_info
+        output = collections.OrderedDict()
+        base_func = getattr(libgroupby, how)
+
+        for name, obj in self._iterate_slices():
+            indexer = np.zeros_like(labels)
+            func = partial(base_func, indexer, labels)
+            if needs_mask:
+                mask = isnull(obj.values).astype(np.uint8, copy=False)
+                func = partial(func, mask)
+
+            if needs_ngroups:
+                func = partial(func, ngroups)
+
+            # Convert any keywords into positional arguments
+            func = partial(func, *(kwargs[x] for x in exp_kwds[how]))
+            func()  # Call func to modify indexer values in place
+            output[name] = algorithms.take_nd(obj.values, indexer)
+
+        return self._wrap_transformed_output(output)
+
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def shift(self, periods=1, freq=None, axis=0):
@@ -1880,17 +1915,10 @@ def shift(self, periods=1, freq=None, axis=0):
         if freq is not None or axis != 0:
             return self.apply(lambda x: x.shift(periods, freq, axis))
 
-        labels, _, ngroups = self.grouper.group_info
-
-        # filled in by Cython
-        indexer = np.zeros_like(labels)
-        libgroupby.group_shift_indexer(indexer, labels, ngroups, periods)
+        return self._get_cythonized_result('group_shift_indexer',
+                                           self.grouper, needs_ngroups=True,
+                                           nperiods=periods)
 
-        output = {}
-        for name, obj in self._iterate_slices():
-            output[name] = algorithms.take_nd(obj.values, indexer)
-
-        return self._wrap_transformed_output(output)
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
@@ -3597,7 +3625,6 @@ def describe(self, **kwargs):
     def value_counts(self, normalize=False, sort=True, ascending=False,
                      bins=None, dropna=True):
 
-        from functools import partial
         from pandas.core.reshape.tile import cut
         from pandas.core.reshape.merge import _get_join_indexers
 
@@ -4605,9 +4632,18 @@ def _apply_to_column_groupbys(self, func):
              in self._iterate_column_groupbys()),
             keys=self._selected_obj.columns, axis=1)
 
+    def _fill(self, direction, limit=None):
+        """Overriden method to concat grouped columns in output"""
+        res = super()._fill(direction, limit=limit)
+        output = collections.OrderedDict()
+        for grp in self.grouper.groupings:
+            ser = grp.group_index.take(grp.labels)
+            output[ser.name] = ser.values
+
+        return self._wrap_transformed_output(output).join(res)
+
     def count(self):
         """ Compute count of group, excluding missing values """
-        from functools import partial
         from pandas.core.dtypes.missing import _isna_ndarraylike as isna
 
         data, _ = self._get_data_to_aggregate()

From 2fe91a4ac6309becb958e97c6b94e61e1dd2c9e2 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 19 Feb 2018 14:09:53 -0800
Subject: [PATCH 14/25] Fixed failing test; list comp for _fill method

---
 pandas/core/groupby.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index d386966d7c8a1..bf78270f76845 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1882,10 +1882,10 @@ def _get_cythonized_result(self, how, grouper, needs_mask=False,
         base_func = getattr(libgroupby, how)
 
         for name, obj in self._iterate_slices():
-            indexer = np.zeros_like(labels)
+            indexer = np.zeros_like(labels, dtype=np.int64)
             func = partial(base_func, indexer, labels)
             if needs_mask:
-                mask = isnull(obj.values).astype(np.uint8, copy=False)
+                mask = isnull(obj.values).view(np.uint8)
                 func = partial(func, mask)
 
             if needs_ngroups:
@@ -4633,12 +4633,11 @@ def _apply_to_column_groupbys(self, func):
             keys=self._selected_obj.columns, axis=1)
 
     def _fill(self, direction, limit=None):
-        """Overriden method to concat grouped columns in output"""
+        """Overriden method to join grouped columns in output"""
         res = super()._fill(direction, limit=limit)
-        output = collections.OrderedDict()
-        for grp in self.grouper.groupings:
-            ser = grp.group_index.take(grp.labels)
-            output[ser.name] = ser.values
+        output = collections.OrderedDict(
+            (grp.name, grp.group_index.take(grp.labels)) for grp in
+             self.grouper.groupings)
 
         return self._wrap_transformed_output(output).join(res)
 

From 825ba172e22890eb7a4f08b4ae93d7af32cb9489 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 19 Feb 2018 14:26:49 -0800
Subject: [PATCH 15/25] Updated whatsnew

---
 doc/source/whatsnew/v0.23.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 44e5fa790fcf2..fcaf46b1c3d71 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -689,7 +689,7 @@ Performance Improvements
 - Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`)
 - Improved performance of :func:`DataFrameGroupBy.rank` (:issue:`15779`)
 - Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`)
-- Improved performance of :func:`GroupBy.ffill` and :func:`GroupBy.bfill` (:issue:`11296`)
+- Improved performance of ``GroupBy.ffill`` and ``GroupBy.bfill`` (:issue:`11296`)
 
 .. _whatsnew_0230.docs:
 

From 127c71c1afbec3e33391b74e52cf9da77efa569d Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 19 Feb 2018 14:28:58 -0800
Subject: [PATCH 16/25] PEP8 fixes

---
 pandas/core/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index bf78270f76845..1c67fabdd3375 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -4637,7 +4637,7 @@ def _fill(self, direction, limit=None):
         res = super()._fill(direction, limit=limit)
         output = collections.OrderedDict(
             (grp.name, grp.group_index.take(grp.labels)) for grp in
-             self.grouper.groupings)
+            self.grouper.groupings)
 
         return self._wrap_transformed_output(output).join(res)
 

From 3a23cd6afa3cba4980b5a5c9e4a7028949b12e93 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 19 Feb 2018 17:32:33 -0800
Subject: [PATCH 17/25] Py27 support with super call

---
 pandas/core/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 1c67fabdd3375..0f4bb16c01a56 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -4634,7 +4634,7 @@ def _apply_to_column_groupbys(self, func):
 
     def _fill(self, direction, limit=None):
         """Overriden method to join grouped columns in output"""
-        res = super()._fill(direction, limit=limit)
+        res = super(DataFrameGroupBy, self)._fill(direction, limit=limit)
         output = collections.OrderedDict(
             (grp.name, grp.group_index.take(grp.labels)) for grp in
             self.grouper.groupings)

From a363146213cca61d5d823d67c5d6df3eb0098957 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Mon, 19 Feb 2018 22:17:54 -0800
Subject: [PATCH 18/25] Fixed LINT issue

---
 pandas/core/groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 0f4bb16c01a56..d3e9f88272c71 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1919,7 +1919,6 @@ def shift(self, periods=1, freq=None, axis=0):
                                            self.grouper, needs_ngroups=True,
                                            nperiods=periods)
 
-
     @Substitution(name='groupby')
     @Appender(_doc_template)
     def head(self, n=5):

From fd513c8e9d77e4fd171a39e62ac945a03bf60633 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 20 Feb 2018 10:22:50 -0800
Subject: [PATCH 19/25] Used kwargs to call Cython groupby funcs

---
 pandas/_libs/groupby_helper.pxi.in | 14 ++++++++------
 pandas/core/groupby.py             | 13 +++----------
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index b0dca670eea8d..296106def7c1f 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -907,14 +907,16 @@ def group_cumsum(numeric[:, :] out,
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
-                        int ngroups, int periods):
+                        int64_t ngroups, **kwargs):
     cdef:
         Py_ssize_t N, i, j, ii
-        int offset, sign
+        int offset, sign, periods
         int64_t lab, idxer, idxer_slot
         int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64)
         int64_t[:, :] label_indexer
 
+    periods = kwargs['periods']
+
     N, = (<object> labels).shape
 
     if periods < 0:
@@ -958,8 +960,7 @@ def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
-                         ndarray[uint8_t] mask, object direction,
-                         int64_t limit):
+                         ndarray[uint8_t] mask, **kwargs):
     """Indexes how to fill values forwards or backwards within a group
 
     Parameters
@@ -980,9 +981,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
     cdef:
         Py_ssize_t i, N
         ndarray[int64_t] sorted_labels
-        int64_t curr_fill_idx=-1
-        int64_t idx, filled_vals=0
+        int64_t limit, idx, curr_fill_idx=-1, filled_vals=0
 
+    direction = kwargs['direction']
+    limit = kwargs['limit']
     N = len(out)
 
     # Make sure all arrays are the same size
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index d3e9f88272c71..044c7d5f31772 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1865,17 +1865,12 @@ def _get_cythonized_result(self, how, grouper, needs_mask=False,
         needs_ngroups : bool, default False
             Whether number of groups part of the Cython call signature
         **kwargs : dict
-            Extra arguments required for the given function. This method
-            internally stores an OrderedDict that maps those keywords to
-            positional arguments before calling the Cython layer
+            Extra arguments to be passed back to Cython funcs
 
         Returns
         -------
         GroupBy object populated with appropriate result(s)
         """
-        exp_kwds = collections.OrderedDict([
-            (('group_fillna_indexer'), ('direction', 'limit')),
-            (('group_shift_indexer'), ('nperiods',))])
 
         labels, _, ngroups = grouper.group_info
         output = collections.OrderedDict()
@@ -1891,9 +1886,7 @@ def _get_cythonized_result(self, how, grouper, needs_mask=False,
             if needs_ngroups:
                 func = partial(func, ngroups)
 
-            # Convert any keywords into positional arguments
-            func = partial(func, *(kwargs[x] for x in exp_kwds[how]))
-            func()  # Call func to modify indexer values in place
+            func(**kwargs)  # Call func to modify indexer values in place
             output[name] = algorithms.take_nd(obj.values, indexer)
 
         return self._wrap_transformed_output(output)
@@ -1917,7 +1910,7 @@ def shift(self, periods=1, freq=None, axis=0):
 
         return self._get_cythonized_result('group_shift_indexer',
                                            self.grouper, needs_ngroups=True,
-                                           nperiods=periods)
+                                           periods=periods)
 
     @Substitution(name='groupby')
     @Appender(_doc_template)

From 776d1b7968526a5178ec009b0ec71c0c6f6468e9 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 20 Feb 2018 11:01:35 -0800
Subject: [PATCH 20/25] Docstring for _fill method

---
 pandas/core/groupby.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 044c7d5f31772..191fd92be893e 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1458,6 +1458,27 @@ def expanding(self, *args, **kwargs):
         return ExpandingGroupby(self, *args, **kwargs)
 
     def _fill(self, direction, limit=None):
+        """Shared function for `pad` and `backfill` to call Cython method
+
+        Parameters
+        ----------
+        direction : {'ffill', 'bfill'}
+            Direction passed to underlying Cython function. `bfill` will cause
+            values to be filled backwards. `ffill` and any other values will
+            default to a forward fill
+        limit : int, default None
+            Maximum number of consecutive values to fill. If `None`, this
+            method will convert to -1 prior to passing to Cython
+
+        Returns
+        -------
+        `Series` or `DataFrame` with filled values
+
+        See Also
+        --------
+        pad
+        backfill
+        """
         # Need int value for Cython
         if limit is None:
             limit = -1
@@ -1869,7 +1890,7 @@ def _get_cythonized_result(self, how, grouper, needs_mask=False,
 
         Returns
         -------
-        GroupBy object populated with appropriate result(s)
+        `Series` or `DataFrame`  with filled values
         """
 
         labels, _, ngroups = grouper.group_info

From 33f0d06840cbbf12e6836109212b64f8d9f8c889 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Tue, 20 Feb 2018 17:36:45 -0800
Subject: [PATCH 21/25] Cleaned up kwargs passing to Cython layer

---
 pandas/_libs/groupby_helper.pxi.in | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 296106def7c1f..c97db16d9d656 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -907,16 +907,14 @@ def group_cumsum(numeric[:, :] out,
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
-                        int64_t ngroups, **kwargs):
+                        int ngroups, int periods):
     cdef:
         Py_ssize_t N, i, j, ii
-        int offset, sign, periods
+        int offset, sign
         int64_t lab, idxer, idxer_slot
         int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64)
         int64_t[:, :] label_indexer
 
-    periods = kwargs['periods']
-
     N, = (<object> labels).shape
 
     if periods < 0:
@@ -960,7 +958,8 @@ def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
-                         ndarray[uint8_t] mask, **kwargs):
+                         ndarray[uint8_t] mask, object direction,
+                         int64_t limit):
     """Indexes how to fill values forwards or backwards within a group
 
     Parameters
@@ -981,10 +980,8 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
     cdef:
         Py_ssize_t i, N
         ndarray[int64_t] sorted_labels
-        int64_t limit, idx, curr_fill_idx=-1, filled_vals=0
+        int64_t idx, curr_fill_idx=-1, filled_vals=0
 
-    direction = kwargs['direction']
-    limit = kwargs['limit']
     N = len(out)
 
     # Make sure all arrays are the same size

From 662008a6adfff8d21c48dbbc759cb511b6697daf Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 22 Feb 2018 08:12:09 -0800
Subject: [PATCH 22/25] Idiomatic update - replace join with concat

---
 pandas/core/groupby.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 191fd92be893e..1d7e09048a03a 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -4652,7 +4652,8 @@ def _fill(self, direction, limit=None):
             (grp.name, grp.group_index.take(grp.labels)) for grp in
             self.grouper.groupings)
 
-        return self._wrap_transformed_output(output).join(res)
+        from pandas.core.reshape.concat import concat
+        return concat((self._wrap_transformed_output(output), res), axis=1)
 
     def count(self):
         """ Compute count of group, excluding missing values """

From 27e24fa36f3781c03dfe179c72f24c62179071ac Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Thu, 22 Feb 2018 18:03:13 -0800
Subject: [PATCH 23/25] Moved non-templated funcs to groupby.pyx

---
 pandas/_libs/groupby.pyx           | 216 ++++++++++++++++++++++++++++
 pandas/_libs/groupby_helper.pxi.in | 219 -----------------------------
 2 files changed, 216 insertions(+), 219 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 866683ce378ab..e3d208a915225 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -94,5 +94,221 @@ cdef inline float64_t kth_smallest_c(float64_t* a,
     return a[k]
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_median_float64(ndarray[float64_t, ndim=2] out,
+                         ndarray[int64_t] counts,
+                         ndarray[float64_t, ndim=2] values,
+                         ndarray[int64_t] labels,
+                         Py_ssize_t min_count=-1):
+    """
+    Only aggregates on axis=0
+    """
+    cdef:
+        Py_ssize_t i, j, N, K, ngroups, size
+        ndarray[int64_t] _counts
+        ndarray data
+        float64_t* ptr
+
+    assert min_count == -1, "'min_count' only used in add and prod"
+
+    ngroups = len(counts)
+    N, K = (<object> values).shape
+
+    indexer, _counts = groupsort_indexer(labels, ngroups)
+    counts[:] = _counts[1:]
+
+    data = np.empty((K, N), dtype=np.float64)
+    ptr = <float64_t*> data.data
+
+    take_2d_axis1_float64_float64(values.T, indexer, out=data)
+
+    with nogil:
+
+        for i in range(K):
+            # exclude NA group
+            ptr += _counts[0]
+            for j in range(ngroups):
+                size = _counts[j + 1]
+                out[j, i] = median_linear(ptr, size)
+                ptr += size
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_cumprod_float64(float64_t[:, :] out,
+                          float64_t[:, :] values,
+                          int64_t[:] labels,
+                          bint is_datetimelike):
+    """
+    Only transforms on axis=0
+    """
+    cdef:
+        Py_ssize_t i, j, N, K, size
+        float64_t val
+        float64_t[:, :] accum
+        int64_t lab
+
+    N, K = (<object> values).shape
+    accum = np.ones_like(values)
+
+    with nogil:
+        for i in range(N):
+            lab = labels[i]
+
+            if lab < 0:
+                continue
+            for j in range(K):
+                val = values[i, j]
+                if val == val:
+                    accum[lab, j] *= val
+                    out[i, j] = accum[lab, j]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_cumsum(numeric[:, :] out,
+                 numeric[:, :] values,
+                 int64_t[:] labels,
+                 is_datetimelike):
+    """
+    Only transforms on axis=0
+    """
+    cdef:
+        Py_ssize_t i, j, N, K, size
+        numeric val
+        numeric[:, :] accum
+        int64_t lab
+
+    N, K = (<object> values).shape
+    accum = np.zeros_like(values)
+
+    with nogil:
+        for i in range(N):
+            lab = labels[i]
+
+            if lab < 0:
+                continue
+            for j in range(K):
+                val = values[i, j]
+
+                if numeric == float32_t or numeric == float64_t:
+                    if val == val:
+                        accum[lab, j] += val
+                        out[i, j] = accum[lab, j]
+                else:
+                    accum[lab, j] += val
+                    out[i, j] = accum[lab, j]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
+                        int ngroups, int periods):
+    cdef:
+        Py_ssize_t N, i, j, ii
+        int offset, sign
+        int64_t lab, idxer, idxer_slot
+        int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64)
+        int64_t[:, :] label_indexer
+
+    N, = (<object> labels).shape
+
+    if periods < 0:
+        periods = -periods
+        offset = N - 1
+        sign = -1
+    elif periods > 0:
+        offset = 0
+        sign = 1
+
+    if periods == 0:
+        with nogil:
+            for i in range(N):
+                out[i] = i
+    else:
+        # array of each previous indexer seen
+        label_indexer = np.zeros((ngroups, periods), dtype=np.int64)
+        with nogil:
+            for i in range(N):
+                ## reverse iterator if shifting backwards
+                ii = offset + sign * i
+                lab = labels[ii]
+
+                # Skip null keys
+                if lab == -1:
+                    out[ii] = -1
+                    continue
+
+                label_seen[lab] += 1
+
+                idxer_slot = label_seen[lab] % periods
+                idxer = label_indexer[lab, idxer_slot]
+
+                if label_seen[lab] > periods:
+                    out[ii] = idxer
+                else:
+                    out[ii] = -1
+
+                label_indexer[lab, idxer_slot] = ii
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
+                         ndarray[uint8_t] mask, object direction,
+                         int64_t limit):
+    """Indexes how to fill values forwards or backwards within a group
+
+    Parameters
+    ----------
+    out : array of int64_t values which this method will write its results to
+        Missing values will be written to with a value of -1
+    labels : array containing unique label for each group, with its ordering
+        matching up to the corresponding record in `values`
+    mask : array of int64_t values where a 1 indicates a missing value
+    direction : {'ffill', 'bfill'}
+        Direction for fill to be applied (forwards or backwards, respectively)
+    limit : Consecutive values to fill before stopping, or -1 for no limit
+
+    Notes
+    -----
+    This method modifies the `out` parameter rather than returning an object
+    """
+    cdef:
+        Py_ssize_t i, N
+        ndarray[int64_t] sorted_labels
+        int64_t idx, curr_fill_idx=-1, filled_vals=0
+
+    N = len(out)
+
+    # Make sure all arrays are the same size
+    assert N == len(labels) == len(mask)
+
+    sorted_labels = np.argsort(labels).astype(np.int64, copy=False)
+    if direction == 'bfill':
+        sorted_labels = sorted_labels[::-1]
+
+    with nogil:
+        for i in range(N):
+            idx = sorted_labels[i]
+            if mask[idx] == 1:  # is missing
+                # Stop filling once we've hit the limit
+                if filled_vals >= limit and limit != -1:
+                    curr_fill_idx = -1
+                filled_vals += 1
+            else:  # reset items when not missing
+                filled_vals = 0
+                curr_fill_idx = idx
+
+            out[idx] = curr_fill_idx
+
+            # If we move to the next group, reset
+            # the fill_idx and counter
+            if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
+                curr_fill_idx = -1
+                filled_vals = 0
+
+
 # generated from template
 include "groupby_helper.pxi"
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index c97db16d9d656..de802f4a72277 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -791,222 +791,3 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                     out[i, j] = mval
 
 {{endfor}}
-
-#----------------------------------------------------------------------
-# other grouping functions not needing a template
-#----------------------------------------------------------------------
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_median_float64(ndarray[float64_t, ndim=2] out,
-                         ndarray[int64_t] counts,
-                         ndarray[float64_t, ndim=2] values,
-                         ndarray[int64_t] labels,
-                         Py_ssize_t min_count=-1):
-    """
-    Only aggregates on axis=0
-    """
-    cdef:
-        Py_ssize_t i, j, N, K, ngroups, size
-        ndarray[int64_t] _counts
-        ndarray data
-        float64_t* ptr
-
-    assert min_count == -1, "'min_count' only used in add and prod"
-
-    ngroups = len(counts)
-    N, K = (<object> values).shape
-
-    indexer, _counts = groupsort_indexer(labels, ngroups)
-    counts[:] = _counts[1:]
-
-    data = np.empty((K, N), dtype=np.float64)
-    ptr = <float64_t*> data.data
-
-    take_2d_axis1_float64_float64(values.T, indexer, out=data)
-
-    with nogil:
-
-        for i in range(K):
-            # exclude NA group
-            ptr += _counts[0]
-            for j in range(ngroups):
-                size = _counts[j + 1]
-                out[j, i] = median_linear(ptr, size)
-                ptr += size
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_cumprod_float64(float64_t[:, :] out,
-                          float64_t[:, :] values,
-                          int64_t[:] labels,
-                          bint is_datetimelike):
-    """
-    Only transforms on axis=0
-    """
-    cdef:
-        Py_ssize_t i, j, N, K, size
-        float64_t val
-        float64_t[:, :] accum
-        int64_t lab
-
-    N, K = (<object> values).shape
-    accum = np.ones_like(values)
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-
-            if lab < 0:
-                continue
-            for j in range(K):
-                val = values[i, j]
-                if val == val:
-                    accum[lab, j] *= val
-                    out[i, j] = accum[lab, j]
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_cumsum(numeric[:, :] out,
-                 numeric[:, :] values,
-                 int64_t[:] labels,
-                 is_datetimelike):
-    """
-    Only transforms on axis=0
-    """
-    cdef:
-        Py_ssize_t i, j, N, K, size
-        numeric val
-        numeric[:, :] accum
-        int64_t lab
-
-    N, K = (<object> values).shape
-    accum = np.zeros_like(values)
-
-    with nogil:
-        for i in range(N):
-            lab = labels[i]
-
-            if lab < 0:
-                continue
-            for j in range(K):
-                val = values[i, j]
-
-                if numeric == float32_t or numeric == float64_t:
-                    if val == val:
-                        accum[lab, j] += val
-                        out[i, j] = accum[lab, j]
-                else:
-                    accum[lab, j] += val
-                    out[i, j] = accum[lab, j]
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
-                        int ngroups, int periods):
-    cdef:
-        Py_ssize_t N, i, j, ii
-        int offset, sign
-        int64_t lab, idxer, idxer_slot
-        int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64)
-        int64_t[:, :] label_indexer
-
-    N, = (<object> labels).shape
-
-    if periods < 0:
-        periods = -periods
-        offset = N - 1
-        sign = -1
-    elif periods > 0:
-        offset = 0
-        sign = 1
-
-    if periods == 0:
-        with nogil:
-            for i in range(N):
-                out[i] = i
-    else:
-        # array of each previous indexer seen
-        label_indexer = np.zeros((ngroups, periods), dtype=np.int64)
-        with nogil:
-            for i in range(N):
-                ## reverse iterator if shifting backwards
-                ii = offset + sign * i
-                lab = labels[ii]
-
-                # Skip null keys
-                if lab == -1:
-                    out[ii] = -1
-                    continue
-
-                label_seen[lab] += 1
-
-                idxer_slot = label_seen[lab] % periods
-                idxer = label_indexer[lab, idxer_slot]
-
-                if label_seen[lab] > periods:
-                    out[ii] = idxer
-                else:
-                    out[ii] = -1
-
-                label_indexer[lab, idxer_slot] = ii
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
-                         ndarray[uint8_t] mask, object direction,
-                         int64_t limit):
-    """Indexes how to fill values forwards or backwards within a group
-
-    Parameters
-    ----------
-    out : array of int64_t values which this method will write its results to
-        Missing values will be written to with a value of -1
-    labels : array containing unique label for each group, with its ordering
-        matching up to the corresponding record in `values`
-    mask : array of int64_t values where a 1 indicates a missing value
-    direction : {'ffill', 'bfill'}
-        Direction for fill to be applied (forwards or backwards, respectively)
-    limit : Consecutive values to fill before stopping, or -1 for no limit
-
-    Notes
-    -----
-    This method modifies the `out` parameter rather than returning an object
-    """
-    cdef:
-        Py_ssize_t i, N
-        ndarray[int64_t] sorted_labels
-        int64_t idx, curr_fill_idx=-1, filled_vals=0
-
-    N = len(out)
-
-    # Make sure all arrays are the same size
-    assert N == len(labels) == len(mask)
-
-    sorted_labels = np.argsort(labels).astype(np.int64, copy=False)
-    if direction == 'bfill':
-        sorted_labels = sorted_labels[::-1]
-
-    with nogil:
-        for i in range(N):
-            idx = sorted_labels[i]
-            if mask[idx] == 1:  # is missing
-                # Stop filling once we've hit the limit
-                if filled_vals >= limit and limit != -1:
-                    curr_fill_idx = -1
-                filled_vals += 1
-            else:  # reset items when not missing
-                filled_vals = 0
-                curr_fill_idx = idx
-
-            out[idx] = curr_fill_idx
-
-            # If we move to the next group, reset
-            # the fill_idx and counter
-            if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
-                curr_fill_idx = -1
-                filled_vals = 0

From 6f72476674da2304ad4f8ba684a911c52936c559 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Fri, 23 Feb 2018 07:42:41 -0800
Subject: [PATCH 24/25] Code update - swap group_index.take with grouper

---
 pandas/core/groupby.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 1d7e09048a03a..bd163b53b17be 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -4649,8 +4649,7 @@ def _fill(self, direction, limit=None):
         """Overriden method to join grouped columns in output"""
         res = super(DataFrameGroupBy, self)._fill(direction, limit=limit)
         output = collections.OrderedDict(
-            (grp.name, grp.group_index.take(grp.labels)) for grp in
-            self.grouper.groupings)
+            (grp.name, grp.grouper) for grp in self.grouper.groupings)
 
         from pandas.core.reshape.concat import concat
         return concat((self._wrap_transformed_output(output), res), axis=1)

From eff660370cceb2c26fef207ae444be9497d063a1 Mon Sep 17 00:00:00 2001
From: Will Ayd <william.ayd@icloud.com>
Date: Sat, 24 Feb 2018 08:33:52 -0800
Subject: [PATCH 25/25] Rebase and update import

---
 pandas/core/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index bd163b53b17be..852ad04cd8a2e 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -4651,7 +4651,7 @@ def _fill(self, direction, limit=None):
         output = collections.OrderedDict(
             (grp.name, grp.grouper) for grp in self.grouper.groupings)
 
-        from pandas.core.reshape.concat import concat
+        from pandas import concat
         return concat((self._wrap_transformed_output(output), res), axis=1)
 
     def count(self):