1
1
import types
2
- from functools import wraps
2
+ from functools import wraps, partial
3
3
import numpy as np
4
4
import datetime
5
5
import collections
@@ -1457,25 +1457,14 @@ def expanding(self, *args, **kwargs):
1457
1457
from pandas.core.window import ExpandingGroupby
1458
1458
return ExpandingGroupby(self, *args, **kwargs)
1459
1459
1460
- def _fill(self, how, limit=None):
1461
- labels, _, _ = self.grouper.group_info
1462
-
1460
+ def _fill(self, direction, limit=None):
1463
1461
# Need int value for Cython
1464
1462
if limit is None:
1465
1463
limit = -1
1466
- output = {}
1467
- if type(self) is DataFrameGroupBy:
1468
- for grp in self.grouper.groupings:
1469
- ser = grp.group_index.take(grp.labels)
1470
- output[ser.name] = ser.values
1471
- for name, obj in self._iterate_slices():
1472
- indexer = np.zeros_like(labels)
1473
- mask = isnull(obj.values).view(np.uint8)
1474
- libgroupby.group_fillna_indexer(indexer, mask, labels, how,
1475
- limit)
1476
- output[name] = algorithms.take_nd(obj.values, indexer)
1477
1464
1478
- return self._wrap_transformed_output(output)
1465
+ return self._get_cythonized_result('group_fillna_indexer',
1466
+ self.grouper, needs_mask=True,
1467
+ direction=direction, limit=limit)
1479
1468
1480
1469
@Substitution(name='groupby')
1481
1470
def pad(self, limit=None):
@@ -1863,6 +1852,52 @@ def cummax(self, axis=0, **kwargs):
1863
1852
1864
1853
return self._cython_transform('cummax', numeric_only=False)
1865
1854
1855
+ def _get_cythonized_result(self, how, grouper, needs_mask=False,
1856
+ needs_ngroups=False, **kwargs):
1857
+ """Get result for Cythonized functions
1858
+
1859
+ Parameters
1860
+ ----------
1861
+ how : str, Cythonized function name to be called
1862
+ grouper : Grouper object containing pertinent group info
1863
+ needs_mask : bool, default False
1864
+ Whether boolean mask needs to be part of the Cython call signature
1865
+ needs_ngroups : bool, default False
1866
+ Whether number of groups part of the Cython call signature
1867
+ **kwargs : dict
1868
+ Extra arguments required for the given function. This method
1869
+ internally stores an OrderedDict that maps those keywords to
1870
+ positional arguments before calling the Cython layer
1871
+
1872
+ Returns
1873
+ -------
1874
+ GroupBy object populated with appropriate result(s)
1875
+ """
1876
+ exp_kwds = collections.OrderedDict([
1877
+ (('group_fillna_indexer'), ('direction', 'limit')),
1878
+ (('group_shift_indexer'), ('nperiods',))])
1879
+
1880
+ labels, _, ngroups = grouper.group_info
1881
+ output = collections.OrderedDict()
1882
+ base_func = getattr(libgroupby, how)
1883
+
1884
+ for name, obj in self._iterate_slices():
1885
+ indexer = np.zeros_like(labels)
1886
+ func = partial(base_func, indexer, labels)
1887
+ if needs_mask:
1888
+ mask = isnull(obj.values).astype(np.uint8, copy=False)
1889
+ func = partial(func, mask)
1890
+
1891
+ if needs_ngroups:
1892
+ func = partial(func, ngroups)
1893
+
1894
+ # Convert any keywords into positional arguments
1895
+ func = partial(func, *(kwargs[x] for x in exp_kwds[how]))
1896
+ func() # Call func to modify indexer values in place
1897
+ output[name] = algorithms.take_nd(obj.values, indexer)
1898
+
1899
+ return self._wrap_transformed_output(output)
1900
+
1866
1901
@Substitution(name='groupby')
1867
1902
@Appender(_doc_template)
1868
1903
def shift(self, periods=1, freq=None, axis=0):
@@ -1880,17 +1915,10 @@ def shift(self, periods=1, freq=None, axis=0):
1880
1915
if freq is not None or axis != 0:
1881
1916
return self.apply(lambda x: x.shift(periods, freq, axis))
1882
1917
1883
- labels, _, ngroups = self.grouper.group_info
1884
-
1885
- # filled in by Cython
1886
- indexer = np.zeros_like(labels)
1887
- libgroupby.group_shift_indexer(indexer, labels, ngroups, periods)
1918
+ return self._get_cythonized_result('group_shift_indexer',
1919
+ self.grouper, needs_ngroups=True,
1920
+ nperiods=periods)
1888
1921
1889
- output = {}
1890
- for name, obj in self._iterate_slices():
1891
- output[name] = algorithms.take_nd(obj.values, indexer)
1892
-
1893
- return self._wrap_transformed_output(output)
1894
1922
1895
1923
@Substitution(name='groupby')
1896
1924
@Appender(_doc_template)
@@ -3597,7 +3625,6 @@ def describe(self, **kwargs):
3597
3625
def value_counts(self, normalize=False, sort=True, ascending=False,
3598
3626
bins=None, dropna=True):
3599
3627
3600
- from functools import partial
3601
3628
from pandas.core.reshape.tile import cut
3602
3629
from pandas.core.reshape.merge import _get_join_indexers
3603
3630
@@ -4605,9 +4632,18 @@ def _apply_to_column_groupbys(self, func):
4605
4632
in self._iterate_column_groupbys()),
4606
4633
keys=self._selected_obj.columns, axis=1)
4607
4634
4635
+ def _fill(self, direction, limit=None):
4636
+ """Overriden method to concat grouped columns in output"""
4637
+ res = super()._fill(direction, limit=limit)
4638
+ output = collections.OrderedDict()
4639
+ for grp in self.grouper.groupings:
4640
+ ser = grp.group_index.take(grp.labels)
4641
+ output[ser.name] = ser.values
4642
+
4643
+ return self._wrap_transformed_output(output).join(res)
4644
+
4608
4645
def count(self):
4609
4646
""" Compute count of group, excluding missing values """
4610
- from functools import partial
4611
4647
from pandas.core.dtypes.missing import _isna_ndarraylike as isna
4612
4648
4613
4649
data, _ = self._get_data_to_aggregate()
0 commit comments