From 397678c33118f9d40d903a8912d5728ec3c04c33 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 13 Sep 2022 07:43:52 +0100 Subject: [PATCH 1/6] ENH: IMprove Roling, Expanding and EWM --- pandas-stubs/_typing.pyi | 3 + pandas-stubs/core/frame.pyi | 13 +- pandas-stubs/core/series.pyi | 13 +- pandas-stubs/core/window/ewm.pyi | 88 +++++++++--- pandas-stubs/core/window/expanding.pyi | 104 +++++++++++++- pandas-stubs/core/window/rolling.pyi | 191 +++++++++++++++++-------- tests/test_windowing.py | 30 ++++ 7 files changed, 347 insertions(+), 95 deletions(-) create mode 100644 tests/test_windowing.py diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 039a34e24..826e06333 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -249,4 +249,7 @@ FileWriteMode = Literal[ ] ColspaceArgType = str | int | Sequence[int | str] | Mapping[Hashable, str | int] +# Windowing rank methods +WindowingRankType = Literal["average", "min", "max"] + __all__ = ["npt", "type_t"] diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index a68a4f589..7e669f20e 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -34,6 +34,10 @@ from pandas.core.indexing import ( ) from pandas.core.resample import Resampler from pandas.core.series import Series +from pandas.core.window import ( + Expanding, + ExponentialMovingWindow, +) from pandas.core.window.rolling import ( Rolling, Window, @@ -1412,8 +1416,13 @@ class DataFrame(NDFrame, OpsMixin): adjust: _bool = ..., ignore_na: _bool = ..., axis: AxisType = ..., - ) -> DataFrame: ... - def expanding(self, min_periods: int = ..., axis: AxisType = ...): ... # for now + ) -> ExponentialMovingWindow: ... + def expanding( + self, + min_periods: int = ..., + axis: AxisType = ..., + method: Literal["single", "table"] = ..., + ) -> Expanding: ... @overload def ffill( self, diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 855013634..44293a2c4 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -49,7 +49,11 @@ from pandas.core.indexing import ( ) from pandas.core.resample import Resampler from pandas.core.strings import StringMethods -from pandas.core.window import ExponentialMovingWindow +from pandas.core.window import ( + Expanding, + ExponentialMovingWindow, + Rolling, +) from pandas.core.window.rolling import ( Rolling, Window, @@ -1321,8 +1325,11 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): axis: SeriesAxisType = ..., ) -> ExponentialMovingWindow: ... def expanding( - self, min_periods: int = ..., axis: SeriesAxisType = ... - ) -> DataFrame: ... + self, + min_periods: int = ..., + axis: SeriesAxisType = ..., + method: Literal["single", "table"] = ..., + ) -> Expanding: ... def floordiv( self, other: num | _ListLike | Series[S1], diff --git a/pandas-stubs/core/window/ewm.pyi b/pandas-stubs/core/window/ewm.pyi index 42afcebc2..ff66ba60d 100644 --- a/pandas-stubs/core/window/ewm.pyi +++ b/pandas-stubs/core/window/ewm.pyi @@ -1,28 +1,72 @@ -from pandas.core.window.rolling import _Rolling +from typing import Any -class ExponentialMovingWindow(_Rolling): - obj = ... - com = ... - min_periods: int = ... - adjust = ... - ignore_na = ... - axis = ... - on = ... +import numpy as np +from pandas import ( + DataFrame, + Series, +) +from pandas.core.generic import NDFrame +from pandas.core.window.rolling import BaseWindow + +from pandas._typing import ( + Axis, + TimedeltaConvertibleTypes, +) + +class ExponentialMovingWindow(BaseWindow): + com: Any = ... # Incomplete + span: Any = ... # Incomplete + halflife: Any = ... # Incomplete + alpha: Any = ... # Incomplete + adjust: Any = ... # Incomplete + ignore_na: Any = ... # Incomplete + times: Any = ... # Incomplete def __init__( self, - obj, - com=..., - span=..., - halflife=..., - alpha=..., - min_periods: int = ..., + obj: NDFrame, + com: float | None = ..., + span: float | None = ..., + halflife: float | TimedeltaConvertibleTypes | None = ..., + alpha: float | None = ..., + min_periods: int | None = ..., adjust: bool = ..., ignore_na: bool = ..., - axis: int = ..., + axis: Axis = ..., + times: str | np.ndarray | NDFrame | None = ..., + method: str = ..., + *, + selection: Any | None = ..., ) -> None: ... - def mean(self, *args, **kwargs): ... - def std(self, bias: bool = ..., *args, **kwargs): ... - vol = ... - def var(self, bias: bool = ..., *args, **kwargs): ... - def cov(self, other=..., pairwise=..., bias: bool = ..., **kwargs): ... - def corr(self, other=..., pairwise=..., **kwargs): ... + def online(self, engine: str = ..., engine_kwargs: Any | None = ...): ... + def aggregate(self, func, *args, **kwargs): ... + agg = aggregate + def mean( + self, + *args, + engine: Any | None = ..., + engine_kwargs: Any | None = ..., + **kwargs, + ) -> Series | DataFrame: ... + def sum( + self, + *args, + engine: Any | None = ..., + engine_kwargs: Any | None = ..., + **kwargs, + ) -> Series | DataFrame: ... + def std(self, bias: bool = ..., *args, **kwargs) -> Series | DataFrame: ... + def vol(self, bias: bool = ..., *args, **kwargs) -> Series | DataFrame: ... + def var(self, bias: bool = ..., *args, **kwargs) -> Series | DataFrame: ... + def cov( + self, + other: DataFrame | Series | None = ..., + pairwise: bool | None = ..., + bias: bool = ..., + **kwargs, + ) -> Series | DataFrame: ... + def corr( + self, + other: DataFrame | Series | None = ..., + pairwise: bool | None = ..., + **kwargs, + ) -> Series | DataFrame: ... diff --git a/pandas-stubs/core/window/expanding.pyi b/pandas-stubs/core/window/expanding.pyi index cf67b0bef..36cce7f36 100644 --- a/pandas-stubs/core/window/expanding.pyi +++ b/pandas-stubs/core/window/expanding.pyi @@ -7,13 +7,30 @@ from pandas import ( DataFrame, Series, ) -from pandas.core.window.rolling import _Rolling_and_Expanding +from pandas.core.generic import NDFrame +from pandas.core.window.rolling import ( + BaseWindowGroupby, + RollingAndExpandingMixin, +) + +from pandas._typing import ( + Axis as Axis, + WindowingRankType as WindowingRankType, +) -class Expanding(_Rolling_and_Expanding): +class Expanding(RollingAndExpandingMixin): def __init__( - self, obj, min_periods: int = ..., center: bool = ..., axis: int = ..., **kwargs + self, + obj: NDFrame, + min_periods: int = ..., + center: Any | None = ..., # Incomplete + axis: Axis = ..., + method: str = ..., + selection: Any | None = ..., # Incomplete ) -> None: ... - def count(self, **kwargs) -> DataFrame | Series: ... + def aggregate(self, func, *args, **kwargs): ... + agg = aggregate + def count(self): ... def apply( self, func: Callable[..., Any], @@ -23,5 +40,82 @@ class Expanding(_Rolling_and_Expanding): args: tuple[Any, ...] | None = ..., kwargs: dict[str, Any] | None = ..., ): ... + def sum( + self, + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def max( + self, + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def min( + self, + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def mean( + self, + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def median( + self, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def std( + self, + ddof: int = ..., + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def var( + self, + ddof: int = ..., + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def sem(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... + def skew(self, **kwargs) -> DataFrame | Series: ... + def kurt(self, **kwargs) -> DataFrame | Series: ... + def quantile( + self, quantile: float, interpolation: str = ..., **kwargs + ) -> DataFrame | Series: ... + def rank( + self, + method: WindowingRankType = ..., + ascending: bool = ..., + pct: bool = ..., + **kwargs, + ) -> DataFrame | Series: ... + def cov( + self, + other: DataFrame | Series | None = ..., + pairwise: bool | None = ..., + ddof: int = ..., + **kwargs, + ) -> DataFrame | Series: ... + def corr( + self, + other: DataFrame | Series | None = ..., + pairwise: bool | None = ..., + ddof: int = ..., + **kwargs, + ) -> DataFrame | Series: ... -class ExpandingGroupby(Expanding): ... +class ExpandingGroupby(BaseWindowGroupby, Expanding): ... diff --git a/pandas-stubs/core/window/rolling.pyi b/pandas-stubs/core/window/rolling.pyi index 267bd7fec..a958fe36a 100644 --- a/pandas-stubs/core/window/rolling.pyi +++ b/pandas-stubs/core/window/rolling.pyi @@ -1,119 +1,184 @@ +from typing import ( + Any, + Callable, + Hashable, +) + import numpy as np from pandas import ( DataFrame, - Index, Series, ) -from pandas.core.base import ( - PandasObject, - SelectionMixin, -) +from pandas.core.base import SelectionMixin +from pandas.core.generic import NDFrame +from pandas.core.groupby.ops import BaseGrouper +from pandas.core.indexes.api import Index from pandas._typing import ( AggFuncType, Axis, Scalar, + WindowingRankType, ) -class _Window(PandasObject, SelectionMixin): - exclusions: set[str] = ... - obj = ... - on = ... - closed = ... - window = ... - min_periods: int = ... - center = ... - win_type: str = ... - win_freq = ... - axis = ... +class BaseWindow(SelectionMixin): + exclusions: frozenset[Hashable] + obj: Any = ... # Incomplete + on: Any = ... # Incomplete + closed: Any = ... # Incomplete + window: Any = ... # Incomplete + min_periods: Any = ... # Incomplete + center: Any = ... # Incomplete + axis: Any = ... # Incomplete + method: Any = ... # Incomplete def __init__( self, - obj, - window=..., + obj: NDFrame, + window: Any | None = ..., min_periods: int | None = ..., - center: bool | None = ..., + center: bool = ..., win_type: str | None = ..., axis: Axis = ..., on: str | Index | None = ..., closed: str | None = ..., - **kwargs, + method: str = ..., + *, + selection: Any | None = ..., ) -> None: ... @property - def is_datetimelike(self) -> bool | None: ... + def win_type(self): ... @property - def is_freq_type(self) -> bool: ... + def is_datetimelike(self) -> bool: ... def validate(self) -> None: ... def __getattr__(self, attr: str): ... def __iter__(self): ... def aggregate( - self, func: AggFuncType = ..., *args, **kwargs - ) -> Scalar | DataFrame | Series: ... - def agg( - self, func: AggFuncType = ..., *args, **kwargs + self, func: AggFuncType, *args, **kwargs ) -> Scalar | DataFrame | Series: ... + agg = aggregate -class Window(_Window): - def validate(self) -> None: ... - def sum(self, *args, **kwargs): ... - def mean(self, *args, **kwargs): ... - def var(self, ddof: int = ..., *args, **kwargs): ... - def std(self, ddof: int = ..., *args, **kwargs): ... +class BaseWindowGroupby(BaseWindow): + def __init__( + self, + obj: DataFrame | Series, + *args, + _grouper: BaseGrouper, + _as_index: bool = ..., + **kwargs, + ) -> None: ... -class _Rolling(_Window): ... +class Window(BaseWindow): + def aggregate( + self, func: AggFuncType, *args, **kwargs + ) -> Scalar | Series | DataFrame: ... + agg = aggregate + def sum(self, *args, **kwargs) -> DataFrame | Series: ... + def mean(self, *args, **kwargs) -> DataFrame | Series: ... + def var(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... + def std(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... -class _Rolling_and_Expanding(_Rolling): +class RollingAndExpandingMixin(BaseWindow): def count(self) -> DataFrame | Series: ... def apply( self, - func, + func: Callable[..., Any], raw: bool = ..., - engine: str = ..., - engine_kwargs: dict | None = ..., - args: tuple | None = ..., - kwargs: dict | None = ..., + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + args: tuple[Any, ...] | None = ..., + kwargs: dict[str, Any] | None = ..., ): ... - def sum(self, *args, **kwargs) -> DataFrame | Series: ... - def max(self, *args, **kwargs) -> DataFrame | Series: ... - def min(self, *args, **kwargs) -> DataFrame | Series: ... - def mean(self, *args, **kwargs) -> DataFrame | Series: ... - def median(self, **kwargs) -> DataFrame | Series: ... - def std(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... - def var(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... + def sum( + self, + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def max( + self, + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def min( + self, + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def mean( + self, + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def median( + self, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def std( + self, + ddof: int = ..., + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... + def var( + self, + ddof: int = ..., + *args, + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + **kwargs, + ) -> DataFrame | Series: ... def skew(self, **kwargs) -> DataFrame | Series: ... + def sem(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... def kurt(self, **kwargs) -> DataFrame | Series: ... def quantile( self, quantile: float, interpolation: str = ..., **kwargs ) -> DataFrame | Series: ... + def rank( + self, + method: WindowingRankType = ..., + ascending: bool = ..., + pct: bool = ..., + **kwargs, + ): ... def cov( self, - other: DataFrame | Series | np.ndarray | None = ..., + other: DataFrame | Series | None = ..., pairwise: bool | None = ..., ddof: int = ..., **kwargs, ) -> DataFrame | Series: ... def corr( self, - other: DataFrame | Series | np.ndarray | None = ..., + other: DataFrame | Series | None = ..., pairwise: bool | None = ..., + ddof: int = ..., **kwargs, ) -> DataFrame | Series: ... -class Rolling(_Rolling_and_Expanding): - def is_datetimelike(self) -> bool: ... - win_freq = ... - window = ... - win_type: str = ... - min_periods: int = ... - def validate(self) -> None: ... - def count(self) -> DataFrame | Series: ... +class Rolling(RollingAndExpandingMixin): + min_periods: int + def aggregate(self, func, *args, **kwargs) -> Scalar | Series | DataFrame: ... + agg = aggregate + def count(self): ... def apply( self, - func, + func: Callable[..., Any], raw: bool = ..., - engine: str = ..., - engine_kwargs=..., - args=..., - kwargs=..., - ): ... + engine: str | None = ..., + engine_kwargs: dict[str, bool] | None = ..., + args: tuple[Any, ...] | None = ..., + kwargs: dict[str, Any] | None = ..., + ) -> Scalar | Series | DataFrame: ... -class RollingGroupby(Rolling): ... +class RollingGroupby(BaseWindowGroupby, Rolling): ... diff --git a/tests/test_windowing.py b/tests/test_windowing.py new file mode 100644 index 000000000..686937085 --- /dev/null +++ b/tests/test_windowing.py @@ -0,0 +1,30 @@ +from typing import Union + +import numpy as np +from pandas import ( + DataFrame, + Series, + date_range, +) +from typing_extensions import assert_type + +from tests import check + +IDX = date_range("1/1/2000", periods=700, freq="D") +S = Series(np.random.standard_normal(700)) +DF = DataFrame({"col1": S, "col2": S}) + + +def test_rolling(): + check(assert_type(S.rolling(10).mean(), Union[Series, DataFrame]), Series) + check(assert_type(DF.rolling(10).mean(), Union[Series, DataFrame]), DataFrame) + + +def test_expanding(): + check(assert_type(S.expanding().mean(), Union[Series, DataFrame]), Series) + check(assert_type(DF.expanding().mean(), Union[Series, DataFrame]), DataFrame) + + +def test_ewm(): + check(assert_type(S.ewm(span=10).mean(), Union[Series, DataFrame]), Series) + check(assert_type(DF.ewm(span=10).mean(), Union[Series, DataFrame]), DataFrame) From da3e116ef604533aa20acdc3437abab5fce49362 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 14 Sep 2022 06:32:00 +0100 Subject: [PATCH 2/6] ENH: Add generic --- pandas-stubs/core/frame.pyi | 8 ++-- pandas-stubs/core/series.pyi | 8 ++-- pandas-stubs/core/window/ewm.pyi | 19 +++++---- pandas-stubs/core/window/expanding.pyi | 35 +++++++-------- pandas-stubs/core/window/rolling.pyi | 59 +++++++++++++------------- tests/test_windowing.py | 14 +++--- 6 files changed, 72 insertions(+), 71 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 7e669f20e..c691b75ff 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1416,13 +1416,13 @@ class DataFrame(NDFrame, OpsMixin): adjust: _bool = ..., ignore_na: _bool = ..., axis: AxisType = ..., - ) -> ExponentialMovingWindow: ... + ) -> ExponentialMovingWindow[DataFrame]: ... def expanding( self, min_periods: int = ..., axis: AxisType = ..., method: Literal["single", "table"] = ..., - ) -> Expanding: ... + ) -> Expanding[DataFrame]: ... @overload def ffill( self, @@ -1779,7 +1779,7 @@ class DataFrame(NDFrame, OpsMixin): on: _str | None = ..., axis: AxisType = ..., closed: _str | None = ..., - ) -> Window: ... + ) -> Window[DataFrame]: ... @overload def rolling( self, @@ -1790,7 +1790,7 @@ class DataFrame(NDFrame, OpsMixin): on: _str | None = ..., axis: AxisType = ..., closed: _str | None = ..., - ) -> Rolling: ... + ) -> Rolling[DataFrame]: ... def rpow( self, other, diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 44293a2c4..3da1f10f2 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -1323,13 +1323,13 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): adjust: _bool = ..., ignore_na: _bool = ..., axis: SeriesAxisType = ..., - ) -> ExponentialMovingWindow: ... + ) -> ExponentialMovingWindow[Series]: ... def expanding( self, min_periods: int = ..., axis: SeriesAxisType = ..., method: Literal["single", "table"] = ..., - ) -> Expanding: ... + ) -> Expanding[Series]: ... def floordiv( self, other: num | _ListLike | Series[S1], @@ -1532,7 +1532,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): on: _str | None = ..., axis: SeriesAxisType = ..., closed: _str | None = ..., - ) -> Window: ... + ) -> Window[Series]: ... @overload def rolling( self, @@ -1543,7 +1543,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): on: _str | None = ..., axis: SeriesAxisType = ..., closed: _str | None = ..., - ) -> Rolling: ... + ) -> Rolling[Series]: ... def rpow( self, other: Series[S1] | Scalar, diff --git a/pandas-stubs/core/window/ewm.pyi b/pandas-stubs/core/window/ewm.pyi index ff66ba60d..db980e281 100644 --- a/pandas-stubs/core/window/ewm.pyi +++ b/pandas-stubs/core/window/ewm.pyi @@ -10,10 +10,11 @@ from pandas.core.window.rolling import BaseWindow from pandas._typing import ( Axis, + NDFrameT, TimedeltaConvertibleTypes, ) -class ExponentialMovingWindow(BaseWindow): +class ExponentialMovingWindow(BaseWindow[NDFrameT]): com: Any = ... # Incomplete span: Any = ... # Incomplete halflife: Any = ... # Incomplete @@ -23,7 +24,7 @@ class ExponentialMovingWindow(BaseWindow): times: Any = ... # Incomplete def __init__( self, - obj: NDFrame, + obj: NDFrameT, com: float | None = ..., span: float | None = ..., halflife: float | TimedeltaConvertibleTypes | None = ..., @@ -46,27 +47,27 @@ class ExponentialMovingWindow(BaseWindow): engine: Any | None = ..., engine_kwargs: Any | None = ..., **kwargs, - ) -> Series | DataFrame: ... + ) -> NDFrameT: ... def sum( self, *args, engine: Any | None = ..., engine_kwargs: Any | None = ..., **kwargs, - ) -> Series | DataFrame: ... - def std(self, bias: bool = ..., *args, **kwargs) -> Series | DataFrame: ... - def vol(self, bias: bool = ..., *args, **kwargs) -> Series | DataFrame: ... - def var(self, bias: bool = ..., *args, **kwargs) -> Series | DataFrame: ... + ) -> NDFrameT: ... + def std(self, bias: bool = ..., *args, **kwargs) -> NDFrameT: ... + def vol(self, bias: bool = ..., *args, **kwargs) -> NDFrameT: ... + def var(self, bias: bool = ..., *args, **kwargs) -> NDFrameT: ... def cov( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., bias: bool = ..., **kwargs, - ) -> Series | DataFrame: ... + ) -> NDFrameT: ... def corr( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., **kwargs, - ) -> Series | DataFrame: ... + ) -> NDFrameT: ... diff --git a/pandas-stubs/core/window/expanding.pyi b/pandas-stubs/core/window/expanding.pyi index 36cce7f36..38d36a2e3 100644 --- a/pandas-stubs/core/window/expanding.pyi +++ b/pandas-stubs/core/window/expanding.pyi @@ -14,11 +14,12 @@ from pandas.core.window.rolling import ( ) from pandas._typing import ( - Axis as Axis, - WindowingRankType as WindowingRankType, + Axis, + NDFrameT, + WindowingRankType, ) -class Expanding(RollingAndExpandingMixin): +class Expanding(RollingAndExpandingMixin[NDFrameT]): def __init__( self, obj: NDFrame, @@ -46,34 +47,34 @@ class Expanding(RollingAndExpandingMixin): engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def max( self, *args, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def min( self, *args, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def mean( self, *args, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def median( self, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def std( self, ddof: int = ..., @@ -81,7 +82,7 @@ class Expanding(RollingAndExpandingMixin): engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def var( self, ddof: int = ..., @@ -89,33 +90,33 @@ class Expanding(RollingAndExpandingMixin): engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... - def sem(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... - def skew(self, **kwargs) -> DataFrame | Series: ... - def kurt(self, **kwargs) -> DataFrame | Series: ... + ) -> NDFrameT: ... + def sem(self, ddof: int = ..., *args, **kwargs) -> NDFrameT: ... + def skew(self, **kwargs) -> NDFrameT: ... + def kurt(self, **kwargs) -> NDFrameT: ... def quantile( self, quantile: float, interpolation: str = ..., **kwargs - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def rank( self, method: WindowingRankType = ..., ascending: bool = ..., pct: bool = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def cov( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., ddof: int = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def corr( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., ddof: int = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... class ExpandingGroupby(BaseWindowGroupby, Expanding): ... diff --git a/pandas-stubs/core/window/rolling.pyi b/pandas-stubs/core/window/rolling.pyi index a958fe36a..666e04232 100644 --- a/pandas-stubs/core/window/rolling.pyi +++ b/pandas-stubs/core/window/rolling.pyi @@ -1,6 +1,7 @@ from typing import ( Any, Callable, + Generic, Hashable, ) @@ -10,18 +11,18 @@ from pandas import ( Series, ) from pandas.core.base import SelectionMixin -from pandas.core.generic import NDFrame from pandas.core.groupby.ops import BaseGrouper from pandas.core.indexes.api import Index from pandas._typing import ( AggFuncType, Axis, + NDFrameT, Scalar, WindowingRankType, ) -class BaseWindow(SelectionMixin): +class BaseWindow(SelectionMixin[NDFrameT], Generic[NDFrameT]): exclusions: frozenset[Hashable] obj: Any = ... # Incomplete on: Any = ... # Incomplete @@ -33,7 +34,7 @@ class BaseWindow(SelectionMixin): method: Any = ... # Incomplete def __init__( self, - obj: NDFrame, + obj: NDFrameT, window: Any | None = ..., min_periods: int | None = ..., center: bool = ..., @@ -57,28 +58,28 @@ class BaseWindow(SelectionMixin): ) -> Scalar | DataFrame | Series: ... agg = aggregate -class BaseWindowGroupby(BaseWindow): +class BaseWindowGroupby(BaseWindow[NDFrameT]): def __init__( self, - obj: DataFrame | Series, + obj: NDFrameT, *args, _grouper: BaseGrouper, _as_index: bool = ..., **kwargs, ) -> None: ... -class Window(BaseWindow): +class Window(BaseWindow[NDFrameT]): def aggregate( self, func: AggFuncType, *args, **kwargs ) -> Scalar | Series | DataFrame: ... agg = aggregate - def sum(self, *args, **kwargs) -> DataFrame | Series: ... - def mean(self, *args, **kwargs) -> DataFrame | Series: ... - def var(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... - def std(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... + def sum(self, *args, **kwargs) -> NDFrameT: ... + def mean(self, *args, **kwargs) -> NDFrameT: ... + def var(self, ddof: int = ..., *args, **kwargs) -> NDFrameT: ... + def std(self, ddof: int = ..., *args, **kwargs) -> NDFrameT: ... -class RollingAndExpandingMixin(BaseWindow): - def count(self) -> DataFrame | Series: ... +class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): + def count(self) -> NDFrameT: ... def apply( self, func: Callable[..., Any], @@ -94,34 +95,34 @@ class RollingAndExpandingMixin(BaseWindow): engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def max( self, *args, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def min( self, *args, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def mean( self, *args, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def median( self, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def std( self, ddof: int = ..., @@ -129,7 +130,7 @@ class RollingAndExpandingMixin(BaseWindow): engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def var( self, ddof: int = ..., @@ -137,40 +138,40 @@ class RollingAndExpandingMixin(BaseWindow): engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., **kwargs, - ) -> DataFrame | Series: ... - def skew(self, **kwargs) -> DataFrame | Series: ... - def sem(self, ddof: int = ..., *args, **kwargs) -> DataFrame | Series: ... - def kurt(self, **kwargs) -> DataFrame | Series: ... + ) -> NDFrameT: ... + def skew(self, **kwargs) -> NDFrameT: ... + def sem(self, ddof: int = ..., *args, **kwargs) -> NDFrameT: ... + def kurt(self, **kwargs) -> NDFrameT: ... def quantile( self, quantile: float, interpolation: str = ..., **kwargs - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def rank( self, method: WindowingRankType = ..., ascending: bool = ..., pct: bool = ..., **kwargs, - ): ... + ) -> NDFrameT: ... def cov( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., ddof: int = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... def corr( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., ddof: int = ..., **kwargs, - ) -> DataFrame | Series: ... + ) -> NDFrameT: ... -class Rolling(RollingAndExpandingMixin): +class Rolling(RollingAndExpandingMixin[NDFrameT]): min_periods: int def aggregate(self, func, *args, **kwargs) -> Scalar | Series | DataFrame: ... agg = aggregate - def count(self): ... + def count(self) -> NDFrameT: ... def apply( self, func: Callable[..., Any], @@ -181,4 +182,4 @@ class Rolling(RollingAndExpandingMixin): kwargs: dict[str, Any] | None = ..., ) -> Scalar | Series | DataFrame: ... -class RollingGroupby(BaseWindowGroupby, Rolling): ... +class RollingGroupby(BaseWindowGroupby[NDFrameT], Rolling): ... diff --git a/tests/test_windowing.py b/tests/test_windowing.py index 686937085..cb62e60a1 100644 --- a/tests/test_windowing.py +++ b/tests/test_windowing.py @@ -1,5 +1,3 @@ -from typing import Union - import numpy as np from pandas import ( DataFrame, @@ -16,15 +14,15 @@ def test_rolling(): - check(assert_type(S.rolling(10).mean(), Union[Series, DataFrame]), Series) - check(assert_type(DF.rolling(10).mean(), Union[Series, DataFrame]), DataFrame) + check(assert_type(S.rolling(10).mean(), Series), Series) + check(assert_type(DF.rolling(10).mean(), DataFrame), DataFrame) def test_expanding(): - check(assert_type(S.expanding().mean(), Union[Series, DataFrame]), Series) - check(assert_type(DF.expanding().mean(), Union[Series, DataFrame]), DataFrame) + check(assert_type(S.expanding().mean(), Series), Series) + check(assert_type(DF.expanding().mean(), DataFrame), DataFrame) def test_ewm(): - check(assert_type(S.ewm(span=10).mean(), Union[Series, DataFrame]), Series) - check(assert_type(DF.ewm(span=10).mean(), Union[Series, DataFrame]), DataFrame) + check(assert_type(S.ewm(span=10).mean(), Series), Series) + check(assert_type(DF.ewm(span=10).mean(), DataFrame), DataFrame) From cab50fab448f7474214087fed96236107f209d5a Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 14 Sep 2022 07:42:02 +0100 Subject: [PATCH 3/6] TST: Add tests for rolling --- pandas-stubs/core/window/rolling.pyi | 63 +++++++--------- tests/test_windowing.py | 109 +++++++++++++++++++++++++-- 2 files changed, 132 insertions(+), 40 deletions(-) diff --git a/pandas-stubs/core/window/rolling.pyi b/pandas-stubs/core/window/rolling.pyi index 666e04232..6e258871e 100644 --- a/pandas-stubs/core/window/rolling.pyi +++ b/pandas-stubs/core/window/rolling.pyi @@ -3,6 +3,8 @@ from typing import ( Callable, Generic, Hashable, + Literal, + TypedDict, ) import numpy as np @@ -22,6 +24,11 @@ from pandas._typing import ( WindowingRankType, ) +class _NumbaKwargs(TypedDict, total=False): + nopython: bool + nogil: bool + parallel: bool + class BaseWindow(SelectionMixin[NDFrameT], Generic[NDFrameT]): exclusions: frozenset[Hashable] obj: Any = ... # Incomplete @@ -53,9 +60,7 @@ class BaseWindow(SelectionMixin[NDFrameT], Generic[NDFrameT]): def validate(self) -> None: ... def __getattr__(self, attr: str): ... def __iter__(self): ... - def aggregate( - self, func: AggFuncType, *args, **kwargs - ) -> Scalar | DataFrame | Series: ... + def aggregate(self, func: AggFuncType, *args, **kwargs) -> NDFrameT: ... agg = aggregate class BaseWindowGroupby(BaseWindow[NDFrameT]): @@ -69,9 +74,7 @@ class BaseWindowGroupby(BaseWindow[NDFrameT]): ) -> None: ... class Window(BaseWindow[NDFrameT]): - def aggregate( - self, func: AggFuncType, *args, **kwargs - ) -> Scalar | Series | DataFrame: ... + def aggregate(self, func: AggFuncType, *args, **kwargs) -> NDFrameT: ... agg = aggregate def sum(self, *args, **kwargs) -> NDFrameT: ... def mean(self, *args, **kwargs) -> NDFrameT: ... @@ -84,102 +87,92 @@ class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): self, func: Callable[..., Any], raw: bool = ..., - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine: Literal["cython", "numba"] | None = ..., + engine_kwargs: _NumbaKwargs = ..., args: tuple[Any, ...] | None = ..., kwargs: dict[str, Any] | None = ..., - ): ... + ) -> NDFrameT: ... def sum( self, - *args, + *, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., - **kwargs, ) -> NDFrameT: ... def max( self, - *args, + *, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., - **kwargs, ) -> NDFrameT: ... def min( self, - *args, + *, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., - **kwargs, ) -> NDFrameT: ... def mean( self, - *args, + *, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., - **kwargs, ) -> NDFrameT: ... def median( self, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., - **kwargs, ) -> NDFrameT: ... def std( self, ddof: int = ..., - *args, + *, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., - **kwargs, ) -> NDFrameT: ... def var( self, ddof: int = ..., - *args, + *, engine: str | None = ..., engine_kwargs: dict[str, bool] | None = ..., - **kwargs, ) -> NDFrameT: ... - def skew(self, **kwargs) -> NDFrameT: ... - def sem(self, ddof: int = ..., *args, **kwargs) -> NDFrameT: ... - def kurt(self, **kwargs) -> NDFrameT: ... + def skew(self) -> NDFrameT: ... + def sem(self, ddof: int = ...) -> NDFrameT: ... + def kurt(self) -> NDFrameT: ... def quantile( - self, quantile: float, interpolation: str = ..., **kwargs + self, + quantile: float, + interpolation: str = ..., ) -> NDFrameT: ... def rank( self, method: WindowingRankType = ..., ascending: bool = ..., pct: bool = ..., - **kwargs, ) -> NDFrameT: ... def cov( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., ddof: int = ..., - **kwargs, ) -> NDFrameT: ... def corr( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., ddof: int = ..., - **kwargs, ) -> NDFrameT: ... class Rolling(RollingAndExpandingMixin[NDFrameT]): - min_periods: int - def aggregate(self, func, *args, **kwargs) -> Scalar | Series | DataFrame: ... + def aggregate(self, func, *args, **kwargs) -> NDFrameT: ... agg = aggregate - def count(self) -> NDFrameT: ... def apply( self, func: Callable[..., Any], raw: bool = ..., - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine: Literal["cython", "numba"] | None = ..., + engine_kwargs: _NumbaKwargs = ..., args: tuple[Any, ...] | None = ..., kwargs: dict[str, Any] | None = ..., - ) -> Scalar | Series | DataFrame: ... + ) -> NDFrameT: ... class RollingGroupby(BaseWindowGroupby[NDFrameT], Rolling): ... diff --git a/tests/test_windowing.py b/tests/test_windowing.py index cb62e60a1..efa4a3502 100644 --- a/tests/test_windowing.py +++ b/tests/test_windowing.py @@ -13,16 +13,115 @@ DF = DataFrame({"col1": S, "col2": S}) -def test_rolling(): - check(assert_type(S.rolling(10).mean(), Series), Series) - check(assert_type(DF.rolling(10).mean(), DataFrame), DataFrame) +def test_rolling_basic_math() -> None: + check(assert_type(DF.rolling(10, min_periods=10).count(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).sum(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).mean(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).median(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).var(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).std(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).min(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).max(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).corr(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).cov(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).skew(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).kurt(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10, min_periods=10).sem(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10).quantile(0.5), DataFrame), DataFrame) + check(assert_type(DF.rolling(10).rank("average"), DataFrame), DataFrame) + check(assert_type(DF.rolling(10).rank("min"), DataFrame), DataFrame) + check(assert_type(DF.rolling(10).rank("max"), DataFrame), DataFrame) + check(assert_type(DF.rolling(10).cov(), DataFrame), DataFrame) + check(assert_type(DF.rolling(10).corr(), DataFrame), DataFrame) -def test_expanding(): +def test_rolling_apply() -> None: + check(assert_type(DF.rolling(10).apply(np.mean), DataFrame), DataFrame) + + def _mean(df: DataFrame) -> Series: + return df.mean() + + check(assert_type(DF.rolling(10).apply(_mean), DataFrame), DataFrame) + + def _mean2(df: DataFrame) -> np.ndarray: + return np.mean(df, axis=0) + + check(assert_type(DF.rolling(10).apply(_mean2, raw=True), DataFrame), DataFrame) + + def _mean4(df: DataFrame) -> float: + return float(np.mean(df)) + + check(assert_type(DF.rolling(10).apply(_mean4, raw=True), DataFrame), DataFrame) + + +def test_rolling_aggregate() -> None: + check(assert_type(DF.rolling(10).aggregate(np.mean), DataFrame), DataFrame) + check( + assert_type(DF.rolling(10).aggregate(["mean", np.mean]), DataFrame), DataFrame + ) + check( + assert_type( + DF.rolling(10).aggregate({"col1": "mean", "col2": np.mean}), DataFrame + ), + DataFrame, + ) + check(assert_type(DF.rolling(10).agg("sum"), DataFrame), DataFrame) + + +def test_rolling_basic_math_series() -> None: + check(assert_type(S.rolling(10, min_periods=10).count(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).sum(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).mean(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).median(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).var(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).std(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).min(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).max(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).corr(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).cov(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).skew(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).kurt(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).sem(), Series), Series) + check(assert_type(S.rolling(10).quantile(0.5), Series), Series) + check(assert_type(S.rolling(10).rank("average"), Series), Series) + check(assert_type(S.rolling(10).rank("min"), Series), Series) + check(assert_type(S.rolling(10).rank("max"), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).cov(), Series), Series) + check(assert_type(S.rolling(10, min_periods=10).corr(), Series), Series) + + +def test_rolling_apply_series() -> None: + check(assert_type(S.rolling(10).apply(np.mean), Series), Series) + + def _mean(df: Series) -> float: + return df.mean() + + check(assert_type(S.rolling(10).apply(_mean), Series), Series) + + def _mean2(df: Series) -> np.ndarray: + return np.mean(df, axis=0) + + check(assert_type(S.rolling(10).apply(_mean2, raw=True), Series), Series) + + +def test_rolling_aggregate_series() -> None: + check(assert_type(S.rolling(10).aggregate(np.mean), Series), Series) + # TODO: Make sure this works + check(assert_type(S.rolling(10).aggregate(["mean", np.mean]), DataFrame), DataFrame) + check( + assert_type( + S.rolling(10).aggregate({"col1": "mean", "col2": np.mean}), DataFrame + ), + DataFrame, + ) + check(assert_type(S.rolling(10).agg("sum"), Series), Series) + + +def test_expanding() -> None: check(assert_type(S.expanding().mean(), Series), Series) check(assert_type(DF.expanding().mean(), DataFrame), DataFrame) -def test_ewm(): +def test_ewm() -> None: check(assert_type(S.ewm(span=10).mean(), Series), Series) check(assert_type(DF.ewm(span=10).mean(), DataFrame), DataFrame) From 425eace6549109bafef637de5c7e3d9f1ce8d8b6 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 14 Sep 2022 23:21:53 +0100 Subject: [PATCH 4/6] TST: Add tests and fix types --- pandas-stubs/core/window/ewm.pyi | 9 +- pandas-stubs/core/window/expanding.pyi | 9 +- pandas-stubs/core/window/rolling.pyi | 5 +- tests/test_frame.py | 30 ++--- tests/test_series.py | 13 +-- tests/test_windowing.py | 153 +++++++++++++++++++++++-- 6 files changed, 166 insertions(+), 53 deletions(-) diff --git a/pandas-stubs/core/window/ewm.pyi b/pandas-stubs/core/window/ewm.pyi index db980e281..fd1604fde 100644 --- a/pandas-stubs/core/window/ewm.pyi +++ b/pandas-stubs/core/window/ewm.pyi @@ -1,4 +1,7 @@ -from typing import Any +from typing import ( + Any, + Generic, +) import numpy as np from pandas import ( @@ -14,7 +17,7 @@ from pandas._typing import ( TimedeltaConvertibleTypes, ) -class ExponentialMovingWindow(BaseWindow[NDFrameT]): +class ExponentialMovingWindow(BaseWindow[NDFrameT], Generic[NDFrameT]): com: Any = ... # Incomplete span: Any = ... # Incomplete halflife: Any = ... # Incomplete @@ -39,7 +42,7 @@ class ExponentialMovingWindow(BaseWindow[NDFrameT]): selection: Any | None = ..., ) -> None: ... def online(self, engine: str = ..., engine_kwargs: Any | None = ...): ... - def aggregate(self, func, *args, **kwargs): ... + def aggregate(self, func, *args, **kwargs) -> NDFrameT: ... agg = aggregate def mean( self, diff --git a/pandas-stubs/core/window/expanding.pyi b/pandas-stubs/core/window/expanding.pyi index 38d36a2e3..0bc396443 100644 --- a/pandas-stubs/core/window/expanding.pyi +++ b/pandas-stubs/core/window/expanding.pyi @@ -11,6 +11,7 @@ from pandas.core.generic import NDFrame from pandas.core.window.rolling import ( BaseWindowGroupby, RollingAndExpandingMixin, + _NumbaKwargs, ) from pandas._typing import ( @@ -29,18 +30,18 @@ class Expanding(RollingAndExpandingMixin[NDFrameT]): method: str = ..., selection: Any | None = ..., # Incomplete ) -> None: ... - def aggregate(self, func, *args, **kwargs): ... + def aggregate(self, func, *args, **kwargs) -> NDFrameT: ... agg = aggregate - def count(self): ... + def count(self) -> NDFrameT: ... def apply( self, func: Callable[..., Any], raw: bool = ..., engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine_kwargs: _NumbaKwargs | None = ..., args: tuple[Any, ...] | None = ..., kwargs: dict[str, Any] | None = ..., - ): ... + ) -> NDFrameT: ... def sum( self, *args, diff --git a/pandas-stubs/core/window/rolling.pyi b/pandas-stubs/core/window/rolling.pyi index 6e258871e..b5a28ec64 100644 --- a/pandas-stubs/core/window/rolling.pyi +++ b/pandas-stubs/core/window/rolling.pyi @@ -20,7 +20,6 @@ from pandas._typing import ( AggFuncType, Axis, NDFrameT, - Scalar, WindowingRankType, ) @@ -88,7 +87,7 @@ class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): func: Callable[..., Any], raw: bool = ..., engine: Literal["cython", "numba"] | None = ..., - engine_kwargs: _NumbaKwargs = ..., + engine_kwargs: _NumbaKwargs | None = ..., args: tuple[Any, ...] | None = ..., kwargs: dict[str, Any] | None = ..., ) -> NDFrameT: ... @@ -170,7 +169,7 @@ class Rolling(RollingAndExpandingMixin[NDFrameT]): func: Callable[..., Any], raw: bool = ..., engine: Literal["cython", "numba"] | None = ..., - engine_kwargs: _NumbaKwargs = ..., + engine_kwargs: _NumbaKwargs | None = ..., args: tuple[Any, ...] | None = ..., kwargs: dict[str, Any] | None = ..., ) -> NDFrameT: ... diff --git a/tests/test_frame.py b/tests/test_frame.py index 9ee7827e1..f9e30bab3 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -723,49 +723,35 @@ def test_types_window() -> None: df.rolling(2, axis=1, center=True) check( - assert_type(df.rolling(2).agg("max"), Union[Scalar, pd.DataFrame, pd.Series]), + assert_type(df.rolling(2).agg("max"), pd.DataFrame), pd.DataFrame, ) check( - assert_type(df.rolling(2).agg(max), Union[Scalar, pd.DataFrame, pd.Series]), + assert_type(df.rolling(2).agg(max), pd.DataFrame), pd.DataFrame, ) check( - assert_type( - df.rolling(2).agg(["max", "min"]), Union[Scalar, pd.DataFrame, pd.Series] - ), + assert_type(df.rolling(2).agg(["max", "min"]), pd.DataFrame), pd.DataFrame, ) check( - assert_type( - df.rolling(2).agg([max, min]), Union[Scalar, pd.DataFrame, pd.Series] - ), + assert_type(df.rolling(2).agg([max, min]), pd.DataFrame), pd.DataFrame, ) check( - assert_type( - df.rolling(2).agg({"col2": "max"}), Union[Scalar, pd.DataFrame, pd.Series] - ), + assert_type(df.rolling(2).agg({"col2": "max"}), pd.DataFrame), pd.DataFrame, ) check( - assert_type( - df.rolling(2).agg({"col2": max}), Union[Scalar, pd.DataFrame, pd.Series] - ), + assert_type(df.rolling(2).agg({"col2": max}), pd.DataFrame), pd.DataFrame, ) check( - assert_type( - df.rolling(2).agg({"col2": ["max", "min"]}), - Union[Scalar, pd.DataFrame, pd.Series], - ), + assert_type(df.rolling(2).agg({"col2": ["max", "min"]}), pd.DataFrame), pd.DataFrame, ) check( - assert_type( - df.rolling(2).agg({"col2": [max, min]}), - Union[Scalar, pd.DataFrame, pd.Series], - ), + assert_type(df.rolling(2).agg({"col2": [max, min]}), pd.DataFrame), pd.DataFrame, ) diff --git a/tests/test_series.py b/tests/test_series.py index 6b92d251f..1e8a283eb 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -12,7 +12,6 @@ Iterator, List, Sequence, - Union, cast, ) @@ -500,23 +499,19 @@ def test_types_window() -> None: s.rolling(2, axis=0, center=True) check( - assert_type(s.rolling(2).agg("sum"), Union[Scalar, pd.Series, pd.DataFrame]), + assert_type(s.rolling(2).agg("sum"), pd.Series), pd.Series, ) check( - assert_type(s.rolling(2).agg(sum), Union[Scalar, pd.Series, pd.DataFrame]), + assert_type(s.rolling(2).agg(sum), pd.Series), pd.Series, ) check( - assert_type( - s.rolling(2).agg(["max", "min"]), Union[Scalar, pd.Series, pd.DataFrame] - ), + assert_type(s.rolling(2).agg(["max", "min"]), pd.Series), pd.DataFrame, ) check( - assert_type( - s.rolling(2).agg([max, min]), Union[Scalar, pd.Series, pd.DataFrame] - ), + assert_type(s.rolling(2).agg([max, min]), pd.Series), pd.DataFrame, ) diff --git a/tests/test_windowing.py b/tests/test_windowing.py index efa4a3502..a1917c1b5 100644 --- a/tests/test_windowing.py +++ b/tests/test_windowing.py @@ -31,8 +31,6 @@ def test_rolling_basic_math() -> None: check(assert_type(DF.rolling(10).rank("average"), DataFrame), DataFrame) check(assert_type(DF.rolling(10).rank("min"), DataFrame), DataFrame) check(assert_type(DF.rolling(10).rank("max"), DataFrame), DataFrame) - check(assert_type(DF.rolling(10).cov(), DataFrame), DataFrame) - check(assert_type(DF.rolling(10).corr(), DataFrame), DataFrame) def test_rolling_apply() -> None: @@ -86,8 +84,6 @@ def test_rolling_basic_math_series() -> None: check(assert_type(S.rolling(10).rank("average"), Series), Series) check(assert_type(S.rolling(10).rank("min"), Series), Series) check(assert_type(S.rolling(10).rank("max"), Series), Series) - check(assert_type(S.rolling(10, min_periods=10).cov(), Series), Series) - check(assert_type(S.rolling(10, min_periods=10).corr(), Series), Series) def test_rolling_apply_series() -> None: @@ -107,21 +103,154 @@ def _mean2(df: Series) -> np.ndarray: def test_rolling_aggregate_series() -> None: check(assert_type(S.rolling(10).aggregate(np.mean), Series), Series) # TODO: Make sure this works - check(assert_type(S.rolling(10).aggregate(["mean", np.mean]), DataFrame), DataFrame) + check(assert_type(S.rolling(10).aggregate(["mean", np.mean]), Series), DataFrame) + check( + assert_type(S.rolling(10).aggregate({"col1": "mean", "col2": np.mean}), Series), + DataFrame, + ) + check(assert_type(S.rolling(10).agg("sum"), Series), Series) + + +def test_expanding_basic_math() -> None: + check(assert_type(DF.expanding(10).count(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).sum(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).mean(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).median(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).var(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).std(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).min(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).max(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).corr(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).cov(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).skew(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).kurt(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).sem(), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).quantile(0.5), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).rank("average"), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).rank("min"), DataFrame), DataFrame) + check(assert_type(DF.expanding(10).rank("max"), DataFrame), DataFrame) + + +def test_expanding_apply() -> None: + check(assert_type(DF.expanding(10).apply(np.mean), DataFrame), DataFrame) + + def _mean(df: DataFrame) -> Series: + return df.mean() + + check(assert_type(DF.expanding(10).apply(_mean), DataFrame), DataFrame) + + def _mean2(df: DataFrame) -> np.ndarray: + return np.mean(df, axis=0) + + check(assert_type(DF.expanding(10).apply(_mean2, raw=True), DataFrame), DataFrame) + + def _mean4(df: DataFrame) -> float: + return float(np.mean(df)) + + check(assert_type(DF.expanding(10).apply(_mean4, raw=True), DataFrame), DataFrame) + + +def test_expanding_aggregate() -> None: + check(assert_type(DF.expanding(10).aggregate(np.mean), DataFrame), DataFrame) + check( + assert_type(DF.expanding(10).aggregate(["mean", np.mean]), DataFrame), DataFrame + ) check( assert_type( - S.rolling(10).aggregate({"col1": "mean", "col2": np.mean}), DataFrame + DF.expanding(10).aggregate({"col1": "mean", "col2": np.mean}), DataFrame ), DataFrame, ) - check(assert_type(S.rolling(10).agg("sum"), Series), Series) + check(assert_type(DF.expanding(10).agg("sum"), DataFrame), DataFrame) -def test_expanding() -> None: - check(assert_type(S.expanding().mean(), Series), Series) - check(assert_type(DF.expanding().mean(), DataFrame), DataFrame) +def test_expanding_basic_math_series() -> None: + check(assert_type(S.expanding(10).count(), Series), Series) + check(assert_type(S.expanding(10).sum(), Series), Series) + check(assert_type(S.expanding(10).mean(), Series), Series) + check(assert_type(S.expanding(10).median(), Series), Series) + check(assert_type(S.expanding(10).var(), Series), Series) + check(assert_type(S.expanding(10).std(), Series), Series) + check(assert_type(S.expanding(10).min(), Series), Series) + check(assert_type(S.expanding(10).max(), Series), Series) + check(assert_type(S.expanding(10).corr(), Series), Series) + check(assert_type(S.expanding(10).cov(), Series), Series) + check(assert_type(S.expanding(10).skew(), Series), Series) + check(assert_type(S.expanding(10).kurt(), Series), Series) + check(assert_type(S.expanding(10).sem(), Series), Series) + check(assert_type(S.expanding(10).quantile(0.5), Series), Series) + check(assert_type(S.expanding(10).rank("average"), Series), Series) + check(assert_type(S.expanding(10).rank("min"), Series), Series) + check(assert_type(S.expanding(10).rank("max"), Series), Series) -def test_ewm() -> None: - check(assert_type(S.ewm(span=10).mean(), Series), Series) +def test_expanding_apply_series() -> None: + check(assert_type(S.expanding(10).apply(np.mean), Series), Series) + + def _mean(df: Series) -> float: + return df.mean() + + check(assert_type(S.expanding(10).apply(_mean), Series), Series) + + def _mean2(df: Series) -> np.ndarray: + return np.mean(df, axis=0) + + check(assert_type(S.expanding(10).apply(_mean2, raw=True), Series), Series) + + +def test_expanding_aggregate_series() -> None: + check(assert_type(S.expanding(10).aggregate(np.mean), Series), Series) + # TODO: Make sure this works + check(assert_type(S.expanding(10).aggregate(["mean", np.mean]), Series), DataFrame) + check( + assert_type( + S.expanding(10).aggregate({"col1": "mean", "col2": np.mean}), Series + ), + DataFrame, + ) + check(assert_type(S.expanding(10).agg("sum"), Series), Series) + + +def test_ewm_basic_math() -> None: + check(assert_type(DF.ewm(span=10).sum(), DataFrame), DataFrame) check(assert_type(DF.ewm(span=10).mean(), DataFrame), DataFrame) + check(assert_type(DF.ewm(span=10).var(), DataFrame), DataFrame) + check(assert_type(DF.ewm(span=10).std(), DataFrame), DataFrame) + check(assert_type(DF.ewm(span=10).corr(), DataFrame), DataFrame) + check(assert_type(DF.ewm(span=10).cov(), DataFrame), DataFrame) + + +def test_ewm_aggregate() -> None: + check(assert_type(DF.ewm(span=10).aggregate(np.mean), DataFrame), DataFrame) + check( + assert_type(DF.ewm(span=10).aggregate(["mean", np.mean]), DataFrame), DataFrame + ) + check( + assert_type( + DF.ewm(span=10).aggregate({"col1": "mean", "col2": np.mean}), DataFrame + ), + DataFrame, + ) + check(assert_type(DF.ewm(span=10).agg("sum"), DataFrame), DataFrame) + + +def test_ewm_basic_math_series() -> None: + check(assert_type(S.ewm(span=10).sum(), Series), Series) + check(assert_type(S.ewm(span=10).mean(), Series), Series) + check(assert_type(S.ewm(span=10).var(), Series), Series) + check(assert_type(S.ewm(span=10).std(), Series), Series) + check(assert_type(S.ewm(span=10).corr(), Series), Series) + check(assert_type(S.ewm(span=10).cov(), Series), Series) + + +def test_ewm_aggregate_series() -> None: + check(assert_type(S.ewm(span=10).aggregate(np.mean), Series), Series) + # TODO: Make sure this works + check(assert_type(S.ewm(span=10).aggregate(["mean", np.mean]), Series), DataFrame) + check( + assert_type( + S.ewm(span=10).aggregate({"col1": "mean", "col2": np.mean}), Series + ), + DataFrame, + ) + check(assert_type(S.ewm(span=10).agg("sum"), Series), Series) From c0a280042f5b95e5c79110dcdf1583a9de709b6e Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Thu, 15 Sep 2022 00:07:47 +0100 Subject: [PATCH 5/6] Improve typing --- pandas-stubs/core/frame.pyi | 9 +++++---- tests/test_windowing.py | 9 +++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index c691b75ff..0cf4c0d35 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -68,6 +68,7 @@ from pandas._typing import ( IndexingInt, IndexLabel, IndexType, + IntervalClosedType, JsonFrameOrient, Label, Level, @@ -1776,9 +1777,9 @@ class DataFrame(NDFrame, OpsMixin): center: _bool = ..., *, win_type: _str, - on: _str | None = ..., + on: Hashable | None = ..., axis: AxisType = ..., - closed: _str | None = ..., + closed: IntervalClosedType | None = ..., ) -> Window[DataFrame]: ... @overload def rolling( @@ -1787,9 +1788,9 @@ class DataFrame(NDFrame, OpsMixin): min_periods: int | None = ..., center: _bool = ..., *, - on: _str | None = ..., + on: Hashable | None = ..., axis: AxisType = ..., - closed: _str | None = ..., + closed: IntervalClosedType | None = ..., ) -> Rolling[DataFrame]: ... def rpow( self, diff --git a/tests/test_windowing.py b/tests/test_windowing.py index a1917c1b5..bb07d682c 100644 --- a/tests/test_windowing.py +++ b/tests/test_windowing.py @@ -4,6 +4,10 @@ Series, date_range, ) +from pandas.core.window import ( + Rolling, + Window, +) from typing_extensions import assert_type from tests import check @@ -13,6 +17,11 @@ DF = DataFrame({"col1": S, "col2": S}) +def test_rolling_basic() -> None: + check(assert_type(DF.rolling(win_type="gaussian"), Window), Window) + check(assert_type(DF.rolling(10, min_periods=10), Rolling), Rolling) + + def test_rolling_basic_math() -> None: check(assert_type(DF.rolling(10, min_periods=10).count(), DataFrame), DataFrame) check(assert_type(DF.rolling(10, min_periods=10).sum(), DataFrame), DataFrame) From 76f653846ff21fc6eac8bdf2a4d4e12c0f0436b4 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Thu, 15 Sep 2022 10:03:33 +0100 Subject: [PATCH 6/6] TYP Further improvements and refactoring --- pandas-stubs/_typing.pyi | 22 ++++- pandas-stubs/core/frame.pyi | 17 ++-- pandas-stubs/core/groupby/generic.pyi | 6 +- pandas-stubs/core/series.pyi | 31 ++----- pandas-stubs/core/window/ewm.pyi | 51 ++++++---- pandas-stubs/core/window/expanding.pyi | 93 +++++++++++-------- pandas-stubs/core/window/rolling.pyi | 124 +++++++++++++++++-------- pandas-stubs/io/formats/style.pyi | 5 +- tests/test_series.py | 4 +- tests/test_windowing.py | 65 +++++++++++-- 10 files changed, 271 insertions(+), 147 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 410701bd5..9e4c0a8e4 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -97,12 +97,17 @@ FuncType = Callable[..., Any] F = TypeVar("F", bound=FuncType) HashableT = TypeVar("HashableT", bound=Hashable) -AggFuncTypeBase = Union[Callable, str] -AggFuncTypeDict = dict[Hashable, Union[AggFuncTypeBase, list[AggFuncTypeBase]]] -AggFuncType = Union[ +AggFuncTypeBase = Union[Callable, str, np.ufunc] +AggFuncTypeDictSeries = dict[Hashable, AggFuncTypeBase] +AggFuncTypeDictFrame = dict[Hashable, Union[AggFuncTypeBase, list[AggFuncTypeBase]]] +AggFuncTypeSeriesToFrame = Union[ + list[AggFuncTypeBase], + AggFuncTypeDictSeries, +] +AggFuncTypeFrame = Union[ AggFuncTypeBase, list[AggFuncTypeBase], - AggFuncTypeDict, + AggFuncTypeDictFrame, ] num = complex @@ -252,6 +257,15 @@ ColspaceArgType = str | int | Sequence[int | str] | Mapping[Hashable, str | int] # Windowing rank methods WindowingRankType = Literal["average", "min", "max"] +WindowingEngine = Union[Literal["cython", "numba"], None] + +class _WindowingNumbaKwargs(TypedDict, total=False): + nopython: bool + nogil: bool + parallel: bool + +WindowingEngineKwargs = Union[_WindowingNumbaKwargs, None] +QuantileInterpolation = Literal["linear", "lower", "higher", "midpoint", "nearest"] class StyleExportDict(TypedDict, total=False): apply: Any diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 0cf4c0d35..773e0b433 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -46,9 +46,9 @@ import xarray as xr from pandas._typing import ( S1, - AggFuncType, AggFuncTypeBase, - AggFuncTypeDict, + AggFuncTypeDictFrame, + AggFuncTypeFrame, AnyArrayLike, ArrayLike, Axes, @@ -78,6 +78,7 @@ from pandas._typing import ( MergeHow, NaPosition, ParquetEngine, + QuantileInterpolation, ReadBuffer, Renamer, ReplaceMethod, @@ -1055,7 +1056,7 @@ class DataFrame(NDFrame, OpsMixin): @overload def agg( self, - func: list[AggFuncTypeBase] | AggFuncTypeDict = ..., + func: list[AggFuncTypeBase] | AggFuncTypeDictFrame = ..., axis: AxisType = ..., **kwargs, ) -> DataFrame: ... @@ -1066,13 +1067,13 @@ class DataFrame(NDFrame, OpsMixin): @overload def aggregate( self, - func: list[AggFuncTypeBase] | AggFuncTypeDict, + func: list[AggFuncTypeBase] | AggFuncTypeDictFrame, axis: AxisType = ..., **kwargs, ) -> DataFrame: ... def transform( self, - func: AggFuncType, + func: AggFuncTypeFrame, axis: AxisType = ..., *args, **kwargs, @@ -1168,8 +1169,7 @@ class DataFrame(NDFrame, OpsMixin): q: float = ..., axis: AxisType = ..., numeric_only: _bool = ..., - interpolation: _str - | Literal["linear", "lower", "higher", "midpoint", "nearest"] = ..., + interpolation: QuantileInterpolation = ..., ) -> Series: ... @overload def quantile( @@ -1177,8 +1177,7 @@ class DataFrame(NDFrame, OpsMixin): q: list[float] | np.ndarray, axis: AxisType = ..., numeric_only: _bool = ..., - interpolation: _str - | Literal["linear", "lower", "higher", "midpoint", "nearest"] = ..., + interpolation: QuantileInterpolation = ..., ) -> DataFrame: ... def to_timestamp( self, diff --git a/pandas-stubs/core/groupby/generic.pyi b/pandas-stubs/core/groupby/generic.pyi index 8648d0fc1..e11354529 100644 --- a/pandas-stubs/core/groupby/generic.pyi +++ b/pandas-stubs/core/groupby/generic.pyi @@ -26,8 +26,8 @@ from pandas.core.series import Series from pandas._typing import ( S1, - AggFuncType, AggFuncTypeBase, + AggFuncTypeFrame, AxisType, Level, ListLike, @@ -154,8 +154,8 @@ class DataFrameGroupBy(GroupBy): def apply( # pyright: ignore[reportOverlappingOverload] self, func: Callable[[Iterable], float], *args, **kwargs ) -> DataFrame: ... - def aggregate(self, arg: AggFuncType = ..., *args, **kwargs) -> DataFrame: ... - def agg(self, arg: AggFuncType = ..., *args, **kwargs) -> DataFrame: ... + def aggregate(self, arg: AggFuncTypeFrame = ..., *args, **kwargs) -> DataFrame: ... + agg = aggregate def transform(self, func, *args, **kwargs): ... def filter( self, func: Callable, dropna: bool = ..., *args, **kwargs diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 3da1f10f2..04f2b0b10 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -63,7 +63,8 @@ import xarray as xr from pandas._typing import ( S1, AggFuncTypeBase, - AggFuncTypeDict, + AggFuncTypeDictFrame, + AggFuncTypeSeriesToFrame, ArrayLike, Axes, Axis, @@ -83,6 +84,7 @@ from pandas._typing import ( MaskType, MergeHow, NaPosition, + QuantileInterpolation, Renamer, ReplaceMethod, Scalar, @@ -457,15 +459,13 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): def quantile( self, q: float = ..., - interpolation: _str - | Literal["linear", "lower", "higher", "midpoint", "nearest"] = ..., + interpolation: QuantileInterpolation = ..., ) -> float: ... @overload def quantile( self, q: _ListLike, - interpolation: _str - | Literal["linear", "lower", "higher", "midpoint", "nearest"] = ..., + interpolation: QuantileInterpolation = ..., ) -> Series[S1]: ... def corr( self, @@ -632,27 +632,12 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): @overload def aggregate( self, - func: list[AggFuncTypeBase] | dict[Hashable, AggFuncTypeBase] = ..., - axis: SeriesAxisType = ..., - *args, - **kwargs, - ) -> Series[S1]: ... - @overload - def agg( - self, - func: AggFuncTypeBase, - axis: SeriesAxisType = ..., - *args, - **kwargs, - ) -> S1: ... - @overload - def agg( - self, - func: list[AggFuncTypeBase] | dict[Hashable, AggFuncTypeBase] = ..., + func: AggFuncTypeSeriesToFrame = ..., axis: SeriesAxisType = ..., *args, **kwargs, ) -> Series[S1]: ... + agg = aggregate @overload def transform( self, @@ -664,7 +649,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): @overload def transform( self, - func: list[AggFuncTypeBase] | AggFuncTypeDict, + func: list[AggFuncTypeBase] | AggFuncTypeDictFrame, axis: SeriesAxisType = ..., *args, **kwargs, diff --git a/pandas-stubs/core/window/ewm.pyi b/pandas-stubs/core/window/ewm.pyi index fd1604fde..714a770dd 100644 --- a/pandas-stubs/core/window/ewm.pyi +++ b/pandas-stubs/core/window/ewm.pyi @@ -1,6 +1,7 @@ from typing import ( Any, Generic, + overload, ) import numpy as np @@ -12,9 +13,14 @@ from pandas.core.generic import NDFrame from pandas.core.window.rolling import BaseWindow from pandas._typing import ( + AggFuncTypeBase, + AggFuncTypeFrame, + AggFuncTypeSeriesToFrame, Axis, NDFrameT, TimedeltaConvertibleTypes, + WindowingEngine, + WindowingEngineKwargs, ) class ExponentialMovingWindow(BaseWindow[NDFrameT], Generic[NDFrameT]): @@ -41,36 +47,49 @@ class ExponentialMovingWindow(BaseWindow[NDFrameT], Generic[NDFrameT]): *, selection: Any | None = ..., ) -> None: ... - def online(self, engine: str = ..., engine_kwargs: Any | None = ...): ... - def aggregate(self, func, *args, **kwargs) -> NDFrameT: ... - agg = aggregate + @overload + def aggregate( + self: ExponentialMovingWindow[Series], + func: AggFuncTypeBase, + *args: Any, + **kwargs: Any, + ) -> Series: ... + @overload + def aggregate( + self: ExponentialMovingWindow[Series], + func: AggFuncTypeSeriesToFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... + @overload + def aggregate( + self: ExponentialMovingWindow[DataFrame], + func: AggFuncTypeFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... def mean( self, - *args, - engine: Any | None = ..., - engine_kwargs: Any | None = ..., - **kwargs, + *, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def sum( self, - *args, - engine: Any | None = ..., - engine_kwargs: Any | None = ..., - **kwargs, + *, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... - def std(self, bias: bool = ..., *args, **kwargs) -> NDFrameT: ... - def vol(self, bias: bool = ..., *args, **kwargs) -> NDFrameT: ... - def var(self, bias: bool = ..., *args, **kwargs) -> NDFrameT: ... + def std(self, bias: bool = ...) -> NDFrameT: ... + def var(self, bias: bool = ...) -> NDFrameT: ... def cov( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., bias: bool = ..., - **kwargs, ) -> NDFrameT: ... def corr( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., - **kwargs, ) -> NDFrameT: ... diff --git a/pandas-stubs/core/window/expanding.pyi b/pandas-stubs/core/window/expanding.pyi index 0bc396443..c5b654828 100644 --- a/pandas-stubs/core/window/expanding.pyi +++ b/pandas-stubs/core/window/expanding.pyi @@ -1,8 +1,10 @@ from typing import ( Any, Callable, + overload, ) +import numpy as np from pandas import ( DataFrame, Series, @@ -11,12 +13,17 @@ from pandas.core.generic import NDFrame from pandas.core.window.rolling import ( BaseWindowGroupby, RollingAndExpandingMixin, - _NumbaKwargs, ) from pandas._typing import ( + AggFuncTypeBase, + AggFuncTypeFrame, + AggFuncTypeSeriesToFrame, Axis, NDFrameT, + QuantileInterpolation, + WindowingEngine, + WindowingEngineKwargs, WindowingRankType, ) @@ -30,94 +37,102 @@ class Expanding(RollingAndExpandingMixin[NDFrameT]): method: str = ..., selection: Any | None = ..., # Incomplete ) -> None: ... - def aggregate(self, func, *args, **kwargs) -> NDFrameT: ... - agg = aggregate + @overload + def aggregate( + self: Expanding[Series], func: AggFuncTypeBase, *args: Any, **kwargs: Any + ) -> Series: ... + @overload + def aggregate( + self: Expanding[Series], + func: AggFuncTypeSeriesToFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... + @overload + def aggregate( + self: Expanding[DataFrame], + func: AggFuncTypeFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... def count(self) -> NDFrameT: ... def apply( self, func: Callable[..., Any], raw: bool = ..., - engine: str | None = ..., - engine_kwargs: _NumbaKwargs | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., args: tuple[Any, ...] | None = ..., kwargs: dict[str, Any] | None = ..., ) -> NDFrameT: ... def sum( self, - *args, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., - **kwargs, + *, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def max( self, - *args, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., - **kwargs, + *, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def min( self, - *args, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., - **kwargs, + *, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def mean( self, - *args, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., - **kwargs, + *, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def median( self, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., - **kwargs, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def std( self, ddof: int = ..., - *args, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., - **kwargs, + *, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def var( self, ddof: int = ..., - *args, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., - **kwargs, + *, + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... - def sem(self, ddof: int = ..., *args, **kwargs) -> NDFrameT: ... - def skew(self, **kwargs) -> NDFrameT: ... - def kurt(self, **kwargs) -> NDFrameT: ... + def sem(self, ddof: int = ...) -> NDFrameT: ... + def skew(self) -> NDFrameT: ... + def kurt(self) -> NDFrameT: ... def quantile( - self, quantile: float, interpolation: str = ..., **kwargs + self, + quantile: float, + interpolation: QuantileInterpolation = ..., ) -> NDFrameT: ... def rank( self, method: WindowingRankType = ..., ascending: bool = ..., pct: bool = ..., - **kwargs, ) -> NDFrameT: ... def cov( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., ddof: int = ..., - **kwargs, ) -> NDFrameT: ... def corr( self, other: DataFrame | Series | None = ..., pairwise: bool | None = ..., ddof: int = ..., - **kwargs, ) -> NDFrameT: ... class ExpandingGroupby(BaseWindowGroupby, Expanding): ... diff --git a/pandas-stubs/core/window/rolling.pyi b/pandas-stubs/core/window/rolling.pyi index b5a28ec64..a465cd2c6 100644 --- a/pandas-stubs/core/window/rolling.pyi +++ b/pandas-stubs/core/window/rolling.pyi @@ -3,8 +3,7 @@ from typing import ( Callable, Generic, Hashable, - Literal, - TypedDict, + overload, ) import numpy as np @@ -17,17 +16,17 @@ from pandas.core.groupby.ops import BaseGrouper from pandas.core.indexes.api import Index from pandas._typing import ( - AggFuncType, + AggFuncTypeBase, + AggFuncTypeFrame, + AggFuncTypeSeriesToFrame, Axis, NDFrameT, + QuantileInterpolation, + WindowingEngine, + WindowingEngineKwargs, WindowingRankType, ) -class _NumbaKwargs(TypedDict, total=False): - nopython: bool - nogil: bool - parallel: bool - class BaseWindow(SelectionMixin[NDFrameT], Generic[NDFrameT]): exclusions: frozenset[Hashable] obj: Any = ... # Incomplete @@ -59,26 +58,59 @@ class BaseWindow(SelectionMixin[NDFrameT], Generic[NDFrameT]): def validate(self) -> None: ... def __getattr__(self, attr: str): ... def __iter__(self): ... - def aggregate(self, func: AggFuncType, *args, **kwargs) -> NDFrameT: ... + @overload + def aggregate( + self: BaseWindow[Series], func: AggFuncTypeBase, *args: Any, **kwargs: Any + ) -> Series: ... + @overload + def aggregate( + self: BaseWindow[Series], + func: AggFuncTypeSeriesToFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... + @overload + def aggregate( + self: BaseWindow[DataFrame], + func: AggFuncTypeFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... agg = aggregate class BaseWindowGroupby(BaseWindow[NDFrameT]): def __init__( self, obj: NDFrameT, - *args, + *args: Any, _grouper: BaseGrouper, _as_index: bool = ..., - **kwargs, + **kwargs: Any, ) -> None: ... class Window(BaseWindow[NDFrameT]): - def aggregate(self, func: AggFuncType, *args, **kwargs) -> NDFrameT: ... - agg = aggregate - def sum(self, *args, **kwargs) -> NDFrameT: ... - def mean(self, *args, **kwargs) -> NDFrameT: ... - def var(self, ddof: int = ..., *args, **kwargs) -> NDFrameT: ... - def std(self, ddof: int = ..., *args, **kwargs) -> NDFrameT: ... + @overload + def aggregate( + self: Window[Series], func: AggFuncTypeBase, *args: Any, **kwargs: Any + ) -> Series: ... + @overload + def aggregate( + self: Window[Series], + func: AggFuncTypeSeriesToFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... + @overload + def aggregate( + self: Window[DataFrame], + func: AggFuncTypeFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... + def sum(self, **kwargs: Any) -> NDFrameT: ... + def mean(self, **kwargs: Any) -> NDFrameT: ... + def var(self, ddof: int = ..., **kwargs: Any) -> NDFrameT: ... + def std(self, ddof: int = ..., **kwargs: Any) -> NDFrameT: ... class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): def count(self) -> NDFrameT: ... @@ -86,53 +118,53 @@ class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): self, func: Callable[..., Any], raw: bool = ..., - engine: Literal["cython", "numba"] | None = ..., - engine_kwargs: _NumbaKwargs | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., args: tuple[Any, ...] | None = ..., kwargs: dict[str, Any] | None = ..., ) -> NDFrameT: ... def sum( self, *, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def max( self, *, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def min( self, *, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def mean( self, *, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def median( self, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def std( self, ddof: int = ..., *, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def var( self, ddof: int = ..., *, - engine: str | None = ..., - engine_kwargs: dict[str, bool] | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs = ..., ) -> NDFrameT: ... def skew(self) -> NDFrameT: ... def sem(self, ddof: int = ...) -> NDFrameT: ... @@ -140,7 +172,7 @@ class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): def quantile( self, quantile: float, - interpolation: str = ..., + interpolation: QuantileInterpolation = ..., ) -> NDFrameT: ... def rank( self, @@ -162,14 +194,30 @@ class RollingAndExpandingMixin(BaseWindow[NDFrameT], Generic[NDFrameT]): ) -> NDFrameT: ... class Rolling(RollingAndExpandingMixin[NDFrameT]): - def aggregate(self, func, *args, **kwargs) -> NDFrameT: ... - agg = aggregate + @overload + def aggregate( + self: Rolling[Series], func: AggFuncTypeBase, *args: Any, **kwargs: Any + ) -> Series: ... + @overload + def aggregate( + self: Rolling[Series], + func: AggFuncTypeSeriesToFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... + @overload + def aggregate( + self: Rolling[DataFrame], + func: AggFuncTypeFrame, + *args: Any, + **kwargs: Any, + ) -> DataFrame: ... def apply( self, func: Callable[..., Any], raw: bool = ..., - engine: Literal["cython", "numba"] | None = ..., - engine_kwargs: _NumbaKwargs | None = ..., + engine: WindowingEngine = ..., + engine_kwargs: WindowingEngineKwargs | None = ..., args: tuple[Any, ...] | None = ..., kwargs: dict[str, Any] | None = ..., ) -> NDFrameT: ... diff --git a/pandas-stubs/io/formats/style.pyi b/pandas-stubs/io/formats/style.pyi index df8f9d4f6..f2bad89ce 100644 --- a/pandas-stubs/io/formats/style.pyi +++ b/pandas-stubs/io/formats/style.pyi @@ -18,6 +18,7 @@ from pandas._typing import ( IndexLabel, IntervalClosedType, Level, + QuantileInterpolation, Scalar, T, WriteBuffer, @@ -312,9 +313,7 @@ class Styler(StylerRenderer[Styler]): axis: AxisType | None = ..., q_left: float = ..., q_right: float = ..., - interpolation: Literal[ - "linear", "lower", "higher", "midpoint", "nearest" - ] = ..., + interpolation: QuantileInterpolation = ..., inclusive: IntervalClosedType = ..., props: str | None = ..., ) -> Styler: ... diff --git a/tests/test_series.py b/tests/test_series.py index 1e8a283eb..9cdc98536 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -507,11 +507,11 @@ def test_types_window() -> None: pd.Series, ) check( - assert_type(s.rolling(2).agg(["max", "min"]), pd.Series), + assert_type(s.rolling(2).agg(["max", "min"]), pd.DataFrame), pd.DataFrame, ) check( - assert_type(s.rolling(2).agg([max, min]), pd.Series), + assert_type(s.rolling(2).agg([max, min]), pd.DataFrame), pd.DataFrame, ) diff --git a/tests/test_windowing.py b/tests/test_windowing.py index bb07d682c..ef0407fc2 100644 --- a/tests/test_windowing.py +++ b/tests/test_windowing.py @@ -18,8 +18,8 @@ def test_rolling_basic() -> None: - check(assert_type(DF.rolling(win_type="gaussian"), Window), Window) - check(assert_type(DF.rolling(10, min_periods=10), Rolling), Rolling) + check(assert_type(DF.rolling(10, win_type="gaussian"), "Window[DataFrame]"), Window) + check(assert_type(DF.rolling(10, min_periods=10), "Rolling[DataFrame]"), Rolling) def test_rolling_basic_math() -> None: @@ -74,6 +74,35 @@ def test_rolling_aggregate() -> None: ) check(assert_type(DF.rolling(10).agg("sum"), DataFrame), DataFrame) + check(assert_type(DF.rolling(10).aggregate(np.mean), DataFrame), DataFrame) + check(assert_type(DF.rolling(10).aggregate("mean"), DataFrame), DataFrame) + + def _mean(df: DataFrame) -> Series: + return df.mean() + + check(assert_type(DF.rolling(10).aggregate(_mean), DataFrame), DataFrame) + + check(assert_type(DF.rolling(10).aggregate([np.mean]), DataFrame), DataFrame) + check( + assert_type(DF.rolling(10).aggregate([np.mean, "mean"]), DataFrame), DataFrame + ) + check( + assert_type( + DF.rolling(10).aggregate({"col1": np.mean, "col2": "mean"}), DataFrame + ), + DataFrame, + ) + check( + assert_type( + DF.rolling(10).aggregate({"col1": [np.mean, "mean"], "col2": "mean"}), + DataFrame, + ), + DataFrame, + ) + + # func: np.ufunc | Callable | str | list[Callable | str, np.ufunc] | dict[Hashable, Callable | str | np.ufunc| list[Callable | str]] + check(assert_type(DF.rolling(10).agg("sum"), DataFrame), DataFrame) + def test_rolling_basic_math_series() -> None: check(assert_type(S.rolling(10, min_periods=10).count(), Series), Series) @@ -111,12 +140,25 @@ def _mean2(df: Series) -> np.ndarray: def test_rolling_aggregate_series() -> None: check(assert_type(S.rolling(10).aggregate(np.mean), Series), Series) - # TODO: Make sure this works - check(assert_type(S.rolling(10).aggregate(["mean", np.mean]), Series), DataFrame) + check(assert_type(S.rolling(10).aggregate("mean"), Series), Series) + + def _mean(s: Series) -> float: + return s.mean() + + check(assert_type(S.rolling(10).aggregate(_mean), Series), Series) + + check(assert_type(S.rolling(10).aggregate([np.mean]), DataFrame), DataFrame) + check(assert_type(S.rolling(10).aggregate([np.mean, "mean"]), DataFrame), DataFrame) check( - assert_type(S.rolling(10).aggregate({"col1": "mean", "col2": np.mean}), Series), + assert_type( + S.rolling(10).aggregate({"col1": np.mean, "col2": "mean", "col3": _mean}), + DataFrame, + ), DataFrame, ) + # check(assert_type(S.rolling(10).aggregate({"col1": [np.mean, "mean"], "col2": "mean"}), Series), DataFrame) + + # func: np.ufunc | Callable | str | list[Callable | str, np.ufunc] | dict[Hashable, Callable | str | np.ufunc| list[Callable | str]] check(assert_type(S.rolling(10).agg("sum"), Series), Series) @@ -210,10 +252,12 @@ def _mean2(df: Series) -> np.ndarray: def test_expanding_aggregate_series() -> None: check(assert_type(S.expanding(10).aggregate(np.mean), Series), Series) # TODO: Make sure this works - check(assert_type(S.expanding(10).aggregate(["mean", np.mean]), Series), DataFrame) + check( + assert_type(S.expanding(10).aggregate(["mean", np.mean]), DataFrame), DataFrame + ) check( assert_type( - S.expanding(10).aggregate({"col1": "mean", "col2": np.mean}), Series + S.expanding(10).aggregate({"col1": "mean", "col2": np.mean}), DataFrame ), DataFrame, ) @@ -254,11 +298,12 @@ def test_ewm_basic_math_series() -> None: def test_ewm_aggregate_series() -> None: check(assert_type(S.ewm(span=10).aggregate(np.mean), Series), Series) - # TODO: Make sure this works - check(assert_type(S.ewm(span=10).aggregate(["mean", np.mean]), Series), DataFrame) + check( + assert_type(S.ewm(span=10).aggregate(["mean", np.mean]), DataFrame), DataFrame + ) check( assert_type( - S.ewm(span=10).aggregate({"col1": "mean", "col2": np.mean}), Series + S.ewm(span=10).aggregate({"col1": "mean", "col2": np.mean}), DataFrame ), DataFrame, )