Skip to content

Commit 0a6a1be

Browse files
committed
API: rolling.apply will pass Series to function
closes #5071
1 parent 4e6aa1c commit 0a6a1be

File tree

4 files changed

+228
-96
lines changed

4 files changed

+228
-96
lines changed

doc/source/whatsnew/v0.23.0.txt

+29-1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,34 @@ The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtyp
6060
pd.get_dummies(df, columns=['c'], dtype=bool).dtypes
6161

6262

63+
.. _whatsnew_0230.enhancements.window_raw:
64+
65+
Rolling/Expanding.apply() accepts a ``raw`` keyword to pass a ``Series`` to the function
66+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
67+
68+
The :func`Series.rolling`, :func:`DataFrame.rolling`, :func`Series.expanding`, :func:`DataFrame.expanding` methods when used with ``.apply()`` have gained a ``raw=None`` parameter.
69+
This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The
70+
default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``.
71+
In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`)
72+
73+
.. ipython:: python
74+
75+
s = pd.Series(np.arange(5), np.arange(5) + 1)
76+
s
77+
78+
Pass a ``Series``:
79+
80+
.. ipython:: python
81+
82+
s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False)
83+
84+
Mimic the original behavior of passing a ndarray:
85+
86+
.. ipython:: python
87+
88+
s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True)
89+
90+
6391
.. _whatsnew_0230.enhancements.merge_on_columns_and_levels:
6492

6593
Merging on a combination of columns and index levels
@@ -407,7 +435,7 @@ Other Enhancements
407435
- Updated ``to_gbq`` and ``read_gbq`` signature and documentation to reflect changes from
408436
the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ
409437
library. (:issue:`20564`)
410-
438+
411439
.. _whatsnew_0230.api_breaking:
412440

413441
Backwards incompatible API changes

pandas/_libs/window.pyx

+32-14
Original file line numberDiff line numberDiff line change
@@ -1432,39 +1432,44 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
14321432
return output
14331433

14341434

1435-
def roll_generic(ndarray[float64_t, cast=True] input,
1435+
def roll_generic(object obj,
14361436
int64_t win, int64_t minp, object index, object closed,
1437-
int offset, object func,
1437+
int offset, object func, bint raw,
14381438
object args, object kwargs):
14391439
cdef:
14401440
ndarray[double_t] output, counts, bufarr
1441+
ndarray[float64_t, cast=True] arr
14411442
float64_t *buf
14421443
float64_t *oldbuf
14431444
int64_t nobs = 0, i, j, s, e, N
14441445
bint is_variable
14451446
ndarray[int64_t] start, end
14461447

1447-
if not input.flags.c_contiguous:
1448-
input = input.copy('C')
1449-
1450-
n = len(input)
1448+
n = len(obj)
14511449
if n == 0:
1452-
return input
1450+
return obj
1451+
1452+
arr = np.asarray(obj)
1453+
1454+
# ndarray input
1455+
if raw:
1456+
if not arr.flags.c_contiguous:
1457+
arr = arr.copy('C')
14531458

1454-
counts = roll_sum(np.concatenate([np.isfinite(input).astype(float),
1459+
counts = roll_sum(np.concatenate([np.isfinite(arr).astype(float),
14551460
np.array([0.] * offset)]),
14561461
win, minp, index, closed)[offset:]
14571462

1458-
start, end, N, win, minp, is_variable = get_window_indexer(input, win,
1463+
start, end, N, win, minp, is_variable = get_window_indexer(arr, win,
14591464
minp, index,
14601465
closed,
14611466
floor=0)
14621467

14631468
output = np.empty(N, dtype=float)
14641469

14651470
if is_variable:
1471+
# variable window arr or series
14661472

1467-
# variable window
14681473
if offset != 0:
14691474
raise ValueError("unable to roll_generic with a non-zero offset")
14701475

@@ -1473,7 +1478,20 @@ def roll_generic(ndarray[float64_t, cast=True] input,
14731478
e = end[i]
14741479

14751480
if counts[i] >= minp:
1476-
output[i] = func(input[s:e], *args, **kwargs)
1481+
if raw:
1482+
output[i] = func(arr[s:e], *args, **kwargs)
1483+
else:
1484+
output[i] = func(obj.iloc[s:e], *args, **kwargs)
1485+
else:
1486+
output[i] = NaN
1487+
1488+
elif not raw:
1489+
# series
1490+
for i from 0 <= i < N:
1491+
if counts[i] >= minp:
1492+
sl = slice(int_max(i + offset - win + 1, 0),
1493+
int_min(i + offset + 1, N))
1494+
output[i] = func(obj.iloc[sl], *args, **kwargs)
14771495
else:
14781496
output[i] = NaN
14791497

@@ -1482,12 +1500,12 @@ def roll_generic(ndarray[float64_t, cast=True] input,
14821500
# truncated windows at the beginning, through first full-length window
14831501
for i from 0 <= i < (int_min(win, N) - offset):
14841502
if counts[i] >= minp:
1485-
output[i] = func(input[0: (i + offset + 1)], *args, **kwargs)
1503+
output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
14861504
else:
14871505
output[i] = NaN
14881506

14891507
# remaining full-length windows
1490-
buf = <float64_t *> input.data
1508+
buf = <float64_t *> arr.data
14911509
bufarr = np.empty(win, dtype=float)
14921510
oldbuf = <float64_t *> bufarr.data
14931511
for i from (win - offset) <= i < (N - offset):
@@ -1502,7 +1520,7 @@ def roll_generic(ndarray[float64_t, cast=True] input,
15021520
# truncated windows at the end
15031521
for i from int_max(N - offset, 0) <= i < N:
15041522
if counts[i] >= minp:
1505-
output[i] = func(input[int_max(i + offset - win + 1, 0): N],
1523+
output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
15061524
*args,
15071525
**kwargs)
15081526
else:

pandas/core/window.py

+38-10
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def _center_window(self, result, window):
314314
def aggregate(self, arg, *args, **kwargs):
315315
result, how = self._aggregate(arg, *args, **kwargs)
316316
if result is None:
317-
return self.apply(arg, args=args, kwargs=kwargs)
317+
return self.apply(arg, raw=False, args=args, kwargs=kwargs)
318318
return result
319319

320320
agg = aggregate
@@ -955,22 +955,48 @@ def count(self):
955955
----------
956956
func : function
957957
Must produce a single value from an ndarray input
958-
\*args and \*\*kwargs are passed to the function""")
958+
raw : bool, default None
959+
* ``False`` : passes each row or column as a Series to the
960+
function.
961+
* ``True`` or ``None`` : the passed function will receive ndarray
962+
objects instead.
963+
If you are just applying a NumPy reduction function this will
964+
achieve much better performance.
965+
.. versionadded:: 0.23.0
966+
967+
\*args and \*\*kwargs are passed to the function""")
968+
969+
def apply(self, func, raw=None, args=(), kwargs={}):
970+
from pandas import Series
959971

960-
def apply(self, func, args=(), kwargs={}):
961972
# TODO: _level is unused?
962973
_level = kwargs.pop('_level', None) # noqa
963974
window = self._get_window()
964975
offset = _offset(window, self.center)
965976
index, indexi = self._get_index()
966977

978+
# TODO: default is for backward compat
979+
# change to False in the future
980+
if raw is None:
981+
warnings.warn(
982+
"pass the raw keyword to remain backward compatible "
983+
"for .apply().\nIn the future, this will default to "
984+
"False, meaning a Series will be passed to the "
985+
"applied function. Not passing raw, defaults "
986+
"raw=True, meaning a ndarray is passed to the "
987+
"applied function", FutureWarning, stacklevel=3)
988+
raw = True
989+
967990
def f(arg, window, min_periods, closed):
968991
minp = _use_window(min_periods, window)
969-
return _window.roll_generic(arg, window, minp, indexi, closed,
970-
offset, func, args, kwargs)
992+
if not raw:
993+
arg = Series(arg, index=self.obj.index)
994+
return _window.roll_generic(
995+
arg, window, minp, indexi,
996+
closed, offset, func, raw, args, kwargs)
971997

972998
return self._apply(f, func, args=args, kwargs=kwargs,
973-
center=False)
999+
center=False, raw=raw)
9741000

9751001
def sum(self, *args, **kwargs):
9761002
nv.validate_window_func('sum', args, kwargs)
@@ -1498,8 +1524,9 @@ def count(self):
14981524
@Substitution(name='rolling')
14991525
@Appender(_doc_template)
15001526
@Appender(_shared_docs['apply'])
1501-
def apply(self, func, args=(), kwargs={}):
1502-
return super(Rolling, self).apply(func, args=args, kwargs=kwargs)
1527+
def apply(self, func, raw=None, args=(), kwargs={}):
1528+
return super(Rolling, self).apply(
1529+
func, raw=raw, args=args, kwargs=kwargs)
15031530

15041531
@Substitution(name='rolling')
15051532
@Appender(_shared_docs['sum'])
@@ -1756,8 +1783,9 @@ def count(self, **kwargs):
17561783
@Substitution(name='expanding')
17571784
@Appender(_doc_template)
17581785
@Appender(_shared_docs['apply'])
1759-
def apply(self, func, args=(), kwargs={}):
1760-
return super(Expanding, self).apply(func, args=args, kwargs=kwargs)
1786+
def apply(self, func, raw=None, args=(), kwargs={}):
1787+
return super(Expanding, self).apply(
1788+
func, raw=raw, args=args, kwargs=kwargs)
17611789

17621790
@Substitution(name='expanding')
17631791
@Appender(_shared_docs['sum'])

0 commit comments

Comments
 (0)