diff --git a/pandas/_typing.py b/pandas/_typing.py index dc51c04447bef..c7d82be4e24a1 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -106,7 +106,8 @@ NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index") -Axis = Union[str, int] +AxisInt = int +Axis = Union[AxisInt, Literal["index", "columns", "rows"]] IndexLabel = Union[Hashable, Sequence[Hashable]] Level = Hashable Shape = Tuple[int, ...] diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 140d41782e6d3..6dc4a66f34710 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -29,7 +29,10 @@ is_bool, is_integer, ) -from pandas._typing import Axis +from pandas._typing import ( + Axis, + AxisInt, +) from pandas.errors import UnsupportedFunctionCall from pandas.util._validators import ( validate_args, @@ -413,7 +416,7 @@ def validate_resampler_func(method: str, args, kwargs) -> None: raise TypeError("too many arguments passed in") -def validate_minmax_axis(axis: int | None, ndim: int = 1) -> None: +def validate_minmax_axis(axis: AxisInt | None, ndim: int = 1) -> None: """ Ensure that the axis argument passed to min, max, argmin, or argmax is zero or None, as otherwise it will be incorrectly ignored. diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6a04cbf4b5846..a43b82380fe20 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -29,6 +29,7 @@ from pandas._typing import ( AnyArrayLike, ArrayLike, + AxisInt, DtypeObj, IndexLabel, TakeIndexer, @@ -1105,7 +1106,7 @@ def mode( def rank( values: ArrayLike, - axis: int = 0, + axis: AxisInt = 0, method: str = "average", na_option: str = "keep", ascending: bool = True, @@ -1483,7 +1484,7 @@ def get_indexer(current_indexer, other_indexer): def take( arr, indices: TakeIndexer, - axis: int = 0, + axis: AxisInt = 0, allow_fill: bool = False, fill_value=None, ): @@ -1675,7 +1676,7 @@ def searchsorted( _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} -def diff(arr, n: int, axis: int = 0): +def diff(arr, n: int, axis: AxisInt = 0): """ difference of n between self, analogous to s-s.shift(n) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 48822d9d01ddb..6ac8857582153 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -31,6 +31,7 @@ AggFuncTypeDict, AggObjType, Axis, + AxisInt, NDFrameT, npt, ) @@ -104,7 +105,7 @@ def frame_apply( class Apply(metaclass=abc.ABCMeta): - axis: int + axis: AxisInt def __init__( self, diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py index 979d3ddac63c2..4f8076af6206e 100644 --- a/pandas/core/array_algos/masked_reductions.py +++ b/pandas/core/array_algos/masked_reductions.py @@ -9,7 +9,10 @@ import numpy as np from pandas._libs import missing as libmissing -from pandas._typing import npt +from pandas._typing import ( + AxisInt, + npt, +) from pandas.core.nanops import check_below_min_count @@ -21,7 +24,7 @@ def _reductions( *, skipna: bool = True, min_count: int = 0, - axis: int | None = None, + axis: AxisInt | None = None, **kwargs, ): """ @@ -62,7 +65,7 @@ def sum( *, skipna: bool = True, min_count: int = 0, - axis: int | None = None, + axis: AxisInt | None = None, ): return _reductions( np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis @@ -75,7 +78,7 @@ def prod( *, skipna: bool = True, min_count: int = 0, - axis: int | None = None, + axis: AxisInt | None = None, ): return _reductions( np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis @@ -88,7 +91,7 @@ def _minmax( mask: npt.NDArray[np.bool_], *, skipna: bool = True, - axis: int | None = None, + axis: AxisInt | None = None, ): """ Reduction for 1D masked array. @@ -125,7 +128,7 @@ def min( mask: npt.NDArray[np.bool_], *, skipna: bool = True, - axis: int | None = None, + axis: AxisInt | None = None, ): return _minmax(np.min, values=values, mask=mask, skipna=skipna, axis=axis) @@ -135,7 +138,7 @@ def max( mask: npt.NDArray[np.bool_], *, skipna: bool = True, - axis: int | None = None, + axis: AxisInt | None = None, ): return _minmax(np.max, values=values, mask=mask, skipna=skipna, axis=axis) @@ -145,7 +148,7 @@ def mean( mask: npt.NDArray[np.bool_], *, skipna: bool = True, - axis: int | None = None, + axis: AxisInt | None = None, ): if not values.size or mask.all(): return libmissing.NA @@ -157,7 +160,7 @@ def var( mask: npt.NDArray[np.bool_], *, skipna: bool = True, - axis: int | None = None, + axis: AxisInt | None = None, ddof: int = 1, ): if not values.size or mask.all(): diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 63f8ccde8a883..19c19c66a7256 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -15,6 +15,7 @@ ) from pandas._typing import ( ArrayLike, + AxisInt, npt, ) @@ -36,7 +37,7 @@ def take_nd( arr: np.ndarray, indexer, - axis: int = ..., + axis: AxisInt = ..., fill_value=..., allow_fill: bool = ..., ) -> np.ndarray: @@ -47,7 +48,7 @@ def take_nd( def take_nd( arr: ExtensionArray, indexer, - axis: int = ..., + axis: AxisInt = ..., fill_value=..., allow_fill: bool = ..., ) -> ArrayLike: @@ -57,7 +58,7 @@ def take_nd( def take_nd( arr: ArrayLike, indexer, - axis: int = 0, + axis: AxisInt = 0, fill_value=lib.no_default, allow_fill: bool = True, ) -> ArrayLike: @@ -120,7 +121,7 @@ def take_nd( def _take_nd_ndarray( arr: np.ndarray, indexer: npt.NDArray[np.intp] | None, - axis: int, + axis: AxisInt, fill_value, allow_fill: bool, ) -> np.ndarray: @@ -287,7 +288,7 @@ def take_2d_multi( @functools.lru_cache(maxsize=128) def _get_take_nd_function_cached( - ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int + ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: AxisInt ): """ Part of _get_take_nd_function below that doesn't need `mask_info` and thus @@ -324,7 +325,11 @@ def _get_take_nd_function_cached( def _get_take_nd_function( - ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int = 0, mask_info=None + ndim: int, + arr_dtype: np.dtype, + out_dtype: np.dtype, + axis: AxisInt = 0, + mask_info=None, ): """ Get the appropriate "take" implementation for the given dimension, axis @@ -503,7 +508,7 @@ def _take_nd_object( arr: np.ndarray, indexer: npt.NDArray[np.intp], out: np.ndarray, - axis: int, + axis: AxisInt, fill_value, mask_info, ) -> None: diff --git a/pandas/core/array_algos/transforms.py b/pandas/core/array_algos/transforms.py index 93b029c21760e..56648189f1759 100644 --- a/pandas/core/array_algos/transforms.py +++ b/pandas/core/array_algos/transforms.py @@ -6,8 +6,10 @@ import numpy as np +from pandas._typing import AxisInt -def shift(values: np.ndarray, periods: int, axis: int, fill_value) -> np.ndarray: + +def shift(values: np.ndarray, periods: int, axis: AxisInt, fill_value) -> np.ndarray: new_values = values if periods == 0 or values.size == 0: diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index f17d343024915..6860ba291bf73 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -17,6 +17,7 @@ from pandas._libs.arrays import NDArrayBacked from pandas._typing import ( ArrayLike, + AxisInt, Dtype, F, PositionalIndexer2D, @@ -157,7 +158,7 @@ def take( *, allow_fill: bool = False, fill_value: Any = None, - axis: int = 0, + axis: AxisInt = 0, ) -> NDArrayBackedExtensionArrayT: if allow_fill: fill_value = self._validate_scalar(fill_value) @@ -192,7 +193,7 @@ def _values_for_factorize(self): return self._ndarray, self._internal_fill_value # Signature of "argmin" incompatible with supertype "ExtensionArray" - def argmin(self, axis: int = 0, skipna: bool = True): # type: ignore[override] + def argmin(self, axis: AxisInt = 0, skipna: bool = True): # type: ignore[override] # override base class by adding axis keyword validate_bool_kwarg(skipna, "skipna") if not skipna and self._hasna: @@ -200,7 +201,7 @@ def argmin(self, axis: int = 0, skipna: bool = True): # type: ignore[override] return nargminmax(self, "argmin", axis=axis) # Signature of "argmax" incompatible with supertype "ExtensionArray" - def argmax(self, axis: int = 0, skipna: bool = True): # type: ignore[override] + def argmax(self, axis: AxisInt = 0, skipna: bool = True): # type: ignore[override] # override base class by adding axis keyword validate_bool_kwarg(skipna, "skipna") if not skipna and self._hasna: @@ -216,7 +217,7 @@ def unique(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT: def _concat_same_type( cls: type[NDArrayBackedExtensionArrayT], to_concat: Sequence[NDArrayBackedExtensionArrayT], - axis: int = 0, + axis: AxisInt = 0, ) -> NDArrayBackedExtensionArrayT: dtypes = {str(x.dtype) for x in to_concat} if len(dtypes) != 1: @@ -351,7 +352,7 @@ def fillna( # ------------------------------------------------------------------------ # Reductions - def _wrap_reduction_result(self, axis: int | None, result): + def _wrap_reduction_result(self, axis: AxisInt | None, result): if axis is None or self.ndim == 1: return self._box_func(result) return self._from_backing_data(result) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 85e85ee6bf070..efce69439ea43 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -30,6 +30,7 @@ from pandas._typing import ( ArrayLike, AstypeArg, + AxisInt, Dtype, FillnaOptions, PositionalIndexer, @@ -1137,7 +1138,7 @@ def factorize( @Substitution(klass="ExtensionArray") @Appender(_extension_array_shared_docs["repeat"]) def repeat( - self: ExtensionArrayT, repeats: int | Sequence[int], axis: int | None = None + self: ExtensionArrayT, repeats: int | Sequence[int], axis: AxisInt | None = None ) -> ExtensionArrayT: nv.validate_repeat((), {"axis": axis}) ind = np.arange(len(self)).repeat(repeats) @@ -1567,7 +1568,7 @@ def _fill_mask_inplace( def _rank( self, *, - axis: int = 0, + axis: AxisInt = 0, method: str = "average", na_option: str = "keep", ascending: bool = True, diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2acf1ac71970f..f2dace75906ad 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -39,6 +39,7 @@ from pandas._typing import ( ArrayLike, AstypeArg, + AxisInt, Dtype, NpDtype, Ordered, @@ -1988,7 +1989,7 @@ def sort_values( def _rank( self, *, - axis: int = 0, + axis: AxisInt = 0, method: str = "average", na_option: str = "keep", ascending: bool = True, @@ -2464,7 +2465,7 @@ def equals(self, other: object) -> bool: @classmethod def _concat_same_type( - cls: type[CategoricalT], to_concat: Sequence[CategoricalT], axis: int = 0 + cls: type[CategoricalT], to_concat: Sequence[CategoricalT], axis: AxisInt = 0 ) -> CategoricalT: from pandas.core.dtypes.concat import union_categoricals diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4f92afd048c2e..611006ebeff94 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -52,6 +52,7 @@ from pandas._libs.tslibs.timestamps import integer_op_not_supported from pandas._typing import ( ArrayLike, + AxisInt, DatetimeLikeScalar, Dtype, DtypeObj, @@ -538,7 +539,7 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: def _concat_same_type( cls: type[DatetimeLikeArrayT], to_concat: Sequence[DatetimeLikeArrayT], - axis: int = 0, + axis: AxisInt = 0, ) -> DatetimeLikeArrayT: new_obj = super()._concat_same_type(to_concat, axis) @@ -1609,7 +1610,7 @@ def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: # -------------------------------------------------------------- # Reductions - def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs): + def min(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs): """ Return the minimum value of the Array or minimum along an axis. @@ -1638,7 +1639,7 @@ def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs): result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs): + def max(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs): """ Return the maximum value of the Array or maximum along an axis. @@ -1667,7 +1668,7 @@ def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs): result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def mean(self, *, skipna: bool = True, axis: int | None = 0): + def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0): """ Return the mean value of the Array. @@ -1706,7 +1707,7 @@ def mean(self, *, skipna: bool = True, axis: int | None = 0): ) return self._wrap_reduction_result(axis, result) - def median(self, *, axis: int | None = None, skipna: bool = True, **kwargs): + def median(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs): nv.validate_median((), kwargs) if axis is not None and abs(axis) >= self.ndim: @@ -2083,11 +2084,11 @@ def ceil(self, freq, ambiguous="raise", nonexistent="raise"): # -------------------------------------------------------------- # Reductions - def any(self, *, axis: int | None = None, skipna: bool = True) -> bool: + def any(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool: # GH#34479 discussion of desired behavior long-term return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) - def all(self, *, axis: int | None = None, skipna: bool = True) -> bool: + def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool: # GH#34479 discussion of desired behavior long-term return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 12a53ae0a39dd..57ef236eb6d5f 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -31,6 +31,7 @@ from pandas._libs.missing import NA from pandas._typing import ( ArrayLike, + AxisInt, Dtype, IntervalClosedType, NpDtype, @@ -813,7 +814,7 @@ def argsort( ascending=ascending, kind=kind, na_position=na_position, **kwargs ) - def min(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA: + def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA: nv.validate_minmax_axis(axis, self.ndim) if not len(self): @@ -830,7 +831,7 @@ def min(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA: indexer = obj.argsort()[0] return obj[indexer] - def max(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA: + def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA: nv.validate_minmax_axis(axis, self.ndim) if not len(self): @@ -1571,7 +1572,7 @@ def delete(self: IntervalArrayT, loc) -> IntervalArrayT: def repeat( self: IntervalArrayT, repeats: int | Sequence[int], - axis: int | None = None, + axis: AxisInt | None = None, ) -> IntervalArrayT: nv.validate_repeat((), {"axis": axis}) left_repeat = self.left.repeat(repeats) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index a944dc88a2e77..b04a26120cabb 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -20,6 +20,7 @@ from pandas._typing import ( ArrayLike, AstypeArg, + AxisInt, DtypeObj, NpDtype, PositionalIndexer, @@ -267,7 +268,7 @@ def swapaxes(self: BaseMaskedArrayT, axis1, axis2) -> BaseMaskedArrayT: mask = self._mask.swapaxes(axis1, axis2) return type(self)(data, mask) - def delete(self: BaseMaskedArrayT, loc, axis: int = 0) -> BaseMaskedArrayT: + def delete(self: BaseMaskedArrayT, loc, axis: AxisInt = 0) -> BaseMaskedArrayT: data = np.delete(self._data, loc, axis=axis) mask = np.delete(self._mask, loc, axis=axis) return type(self)(data, mask) @@ -783,7 +784,7 @@ def nbytes(self) -> int: def _concat_same_type( cls: type[BaseMaskedArrayT], to_concat: Sequence[BaseMaskedArrayT], - axis: int = 0, + axis: AxisInt = 0, ) -> BaseMaskedArrayT: data = np.concatenate([x._data for x in to_concat], axis=axis) mask = np.concatenate([x._mask for x in to_concat], axis=axis) @@ -795,7 +796,7 @@ def take( *, allow_fill: bool = False, fill_value: Scalar | None = None, - axis: int = 0, + axis: AxisInt = 0, ) -> BaseMaskedArrayT: # we always fill with 1 internally # to avoid upcasting @@ -1060,7 +1061,9 @@ def _wrap_reduction_result(self, name: str, result, skipna, **kwargs): return self._maybe_mask_result(result, mask) return result - def sum(self, *, skipna: bool = True, min_count=0, axis: int | None = 0, **kwargs): + def sum( + self, *, skipna: bool = True, min_count=0, axis: AxisInt | None = 0, **kwargs + ): nv.validate_sum((), kwargs) # TODO: do this in validate_sum? @@ -1081,7 +1084,9 @@ def sum(self, *, skipna: bool = True, min_count=0, axis: int | None = 0, **kwarg "sum", result, skipna=skipna, axis=axis, **kwargs ) - def prod(self, *, skipna: bool = True, min_count=0, axis: int | None = 0, **kwargs): + def prod( + self, *, skipna: bool = True, min_count=0, axis: AxisInt | None = 0, **kwargs + ): nv.validate_prod((), kwargs) result = masked_reductions.prod( self._data, @@ -1094,7 +1099,7 @@ def prod(self, *, skipna: bool = True, min_count=0, axis: int | None = 0, **kwar "prod", result, skipna=skipna, axis=axis, **kwargs ) - def mean(self, *, skipna: bool = True, axis: int | None = 0, **kwargs): + def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs): nv.validate_mean((), kwargs) result = masked_reductions.mean( self._data, @@ -1107,7 +1112,7 @@ def mean(self, *, skipna: bool = True, axis: int | None = 0, **kwargs): ) def var( - self, *, skipna: bool = True, axis: int | None = 0, ddof: int = 1, **kwargs + self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs ): nv.validate_stat_ddof_func((), kwargs, fname="var") result = masked_reductions.var( @@ -1121,7 +1126,7 @@ def var( "var", result, skipna=skipna, axis=axis, **kwargs ) - def min(self, *, skipna: bool = True, axis: int | None = 0, **kwargs): + def min(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs): nv.validate_min((), kwargs) return masked_reductions.min( self._data, @@ -1130,7 +1135,7 @@ def min(self, *, skipna: bool = True, axis: int | None = 0, **kwargs): axis=axis, ) - def max(self, *, skipna: bool = True, axis: int | None = 0, **kwargs): + def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs): nv.validate_max((), kwargs) return masked_reductions.max( self._data, diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 36c67d2fe1225..290860d897ea2 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -4,6 +4,7 @@ from pandas._libs import lib from pandas._typing import ( + AxisInt, Dtype, NpDtype, Scalar, @@ -202,7 +203,7 @@ def _values_for_factorize(self) -> tuple[np.ndarray, float | None]: def any( self, *, - axis: int | None = None, + axis: AxisInt | None = None, out=None, keepdims: bool = False, skipna: bool = True, @@ -214,7 +215,7 @@ def any( def all( self, *, - axis: int | None = None, + axis: AxisInt | None = None, out=None, keepdims: bool = False, skipna: bool = True, @@ -223,14 +224,18 @@ def all( result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def min(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar: + def min( + self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs + ) -> Scalar: nv.validate_min((), kwargs) result = nanops.nanmin( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._wrap_reduction_result(axis, result) - def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scalar: + def max( + self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs + ) -> Scalar: nv.validate_max((), kwargs) result = nanops.nanmax( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna @@ -238,7 +243,7 @@ def max(self, *, axis: int | None = None, skipna: bool = True, **kwargs) -> Scal return self._wrap_reduction_result(axis, result) def sum( - self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs + self, *, axis: AxisInt | None = None, skipna: bool = True, min_count=0, **kwargs ) -> Scalar: nv.validate_sum((), kwargs) result = nanops.nansum( @@ -247,7 +252,7 @@ def sum( return self._wrap_reduction_result(axis, result) def prod( - self, *, axis: int | None = None, skipna: bool = True, min_count=0, **kwargs + self, *, axis: AxisInt | None = None, skipna: bool = True, min_count=0, **kwargs ) -> Scalar: nv.validate_prod((), kwargs) result = nanops.nanprod( @@ -258,7 +263,7 @@ def prod( def mean( self, *, - axis: int | None = None, + axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, keepdims: bool = False, @@ -271,7 +276,7 @@ def mean( def median( self, *, - axis: int | None = None, + axis: AxisInt | None = None, out=None, overwrite_input: bool = False, keepdims: bool = False, @@ -286,7 +291,7 @@ def median( def std( self, *, - axis: int | None = None, + axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, ddof=1, @@ -302,7 +307,7 @@ def std( def var( self, *, - axis: int | None = None, + axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, ddof=1, @@ -318,7 +323,7 @@ def var( def sem( self, *, - axis: int | None = None, + axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, ddof=1, @@ -334,7 +339,7 @@ def sem( def kurt( self, *, - axis: int | None = None, + axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, keepdims: bool = False, @@ -349,7 +354,7 @@ def kurt( def skew( self, *, - axis: int | None = None, + axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, keepdims: bool = False, diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index a63b5ff152a6c..c6d2cf9e25fa9 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -32,6 +32,7 @@ from pandas._typing import ( ArrayLike, AstypeArg, + AxisInt, Dtype, NpDtype, PositionalIndexer, @@ -1500,7 +1501,12 @@ def any(self, axis=0, *args, **kwargs): return values.any().item() def sum( - self, axis: int = 0, min_count: int = 0, skipna: bool = True, *args, **kwargs + self, + axis: AxisInt = 0, + min_count: int = 0, + skipna: bool = True, + *args, + **kwargs, ) -> Scalar: """ Sum of non-NA/null values @@ -1538,7 +1544,7 @@ def sum( return na_value_for_dtype(self.dtype.subtype, compat=False) return sp_sum + self.fill_value * nsparse - def cumsum(self, axis: int = 0, *args, **kwargs) -> SparseArray: + def cumsum(self, axis: AxisInt = 0, *args, **kwargs) -> SparseArray: """ Cumulative sum of non-NA/null values. @@ -1589,7 +1595,7 @@ def mean(self, axis=0, *args, **kwargs): nsparse = self.sp_index.ngaps return (sp_sum + self.fill_value * nsparse) / (ct + nsparse) - def max(self, *, axis: int | None = None, skipna: bool = True): + def max(self, *, axis: AxisInt | None = None, skipna: bool = True): """ Max of array values, ignoring NA values if specified. @@ -1607,7 +1613,7 @@ def max(self, *, axis: int | None = None, skipna: bool = True): nv.validate_minmax_axis(axis, self.ndim) return self._min_max("max", skipna=skipna) - def min(self, *, axis: int | None = None, skipna: bool = True): + def min(self, *, axis: AxisInt | None = None, skipna: bool = True): """ Min of array values, ignoring NA values if specified. diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 959b2bed5d7f5..2f9857eb43860 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -12,6 +12,7 @@ ) from pandas._libs.arrays import NDArrayBacked from pandas._typing import ( + AxisInt, Dtype, Scalar, npt, @@ -452,7 +453,7 @@ def astype(self, dtype, copy: bool = True): return super().astype(dtype, copy) def _reduce( - self, name: str, *, skipna: bool = True, axis: int | None = 0, **kwargs + self, name: str, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs ): if name in ["min", "max"]: return getattr(self, name)(skipna=skipna, axis=axis) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 12e3e9813a816..06ea661aca6c9 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -32,6 +32,7 @@ parse_timedelta_unit, ) from pandas._typing import ( + AxisInt, DtypeObj, NpDtype, npt, @@ -327,7 +328,7 @@ def __iter__(self) -> Iterator: def sum( self, *, - axis: int | None = None, + axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, keepdims: bool = False, @@ -347,7 +348,7 @@ def sum( def std( self, *, - axis: int | None = None, + axis: AxisInt | None = None, dtype: NpDtype | None = None, out=None, ddof: int = 1, diff --git a/pandas/core/base.py b/pandas/core/base.py index 8e19c75640c1d..7f8db4182c71d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -25,6 +25,7 @@ import pandas._libs.lib as lib from pandas._typing import ( ArrayLike, + AxisInt, DtypeObj, IndexLabel, NDFrameT, @@ -545,7 +546,7 @@ def to_numpy( def empty(self) -> bool: return not self.size - def max(self, axis=None, skipna: bool = True, *args, **kwargs): + def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs): """ Return the maximum value of the Index. @@ -590,7 +591,9 @@ def max(self, axis=None, skipna: bool = True, *args, **kwargs): return nanops.nanmax(self._values, skipna=skipna) @doc(op="max", oppose="min", value="largest") - def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + def argmax( + self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs + ) -> int: """ Return int position of the {value} value in the Series. @@ -657,7 +660,7 @@ def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: delegate, skipna=skipna ) - def min(self, axis=None, skipna: bool = True, *args, **kwargs): + def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs): """ Return the minimum value of the Index. @@ -702,7 +705,9 @@ def min(self, axis=None, skipna: bool = True, *args, **kwargs): return nanops.nanmin(self._values, skipna=skipna) @doc(argmax, op="min", oppose="max", value="smallest") - def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: + def argmin( + self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs + ) -> int: delegate = self._values nv.validate_minmax_axis(axis) skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 28e1498c5906c..8ea78ff68a291 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -14,6 +14,7 @@ from pandas._typing import ( ArrayLike, + AxisInt, DtypeObj, ) from pandas.util._exceptions import find_stack_level @@ -69,7 +70,7 @@ def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: return astype_array(arr, dtype=dtype, copy=False) -def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False): +def concat_compat(to_concat, axis: AxisInt = 0, ea_compat_axis: bool = False): """ provide concatenation of an array of arrays each of which is a single 'normalized' dtypes (in that for example, if it's object, then it is a @@ -329,7 +330,7 @@ def _maybe_unwrap(x): return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True) -def _concatenate_2d(to_concat, axis: int): +def _concatenate_2d(to_concat, axis: AxisInt): # coerce to 2d if needed & concatenate if axis == 1: to_concat = [np.atleast_2d(x) for x in to_concat] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 79e8b2ed806c8..edca6918368fa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -55,6 +55,7 @@ ArrayLike, Axes, Axis, + AxisInt, ColspaceArgType, CompressionOptions, Dtype, @@ -3712,7 +3713,7 @@ def T(self) -> DataFrame: # ---------------------------------------------------------------------- # Indexing Methods - def _ixs(self, i: int, axis: int = 0) -> Series: + def _ixs(self, i: int, axis: AxisInt = 0) -> Series: """ Parameters ---------- @@ -7604,7 +7605,7 @@ def _arith_method(self, other, op): _logical_method = _arith_method - def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None): + def _dispatch_frame_op(self, right, func: Callable, axis: AxisInt | None = None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -10769,7 +10770,7 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False) return result.astype("int64").__finalize__(self, method="count") - def _count_level(self, level: Level, axis: int = 0, numeric_only: bool = False): + def _count_level(self, level: Level, axis: AxisInt = 0, numeric_only: bool = False): if numeric_only: frame = self._get_numeric_data() else: @@ -11654,7 +11655,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: # ---------------------------------------------------------------------- # Add index and columns - _AXIS_ORDERS = ["index", "columns"] + _AXIS_ORDERS: list[Literal["index", "columns"]] = ["index", "columns"] _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = { **NDFrame._AXIS_TO_AXIS_NUMBER, 1: 1, @@ -11662,7 +11663,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: } _AXIS_LEN = len(_AXIS_ORDERS) _info_axis_number = 1 - _info_axis_name = "columns" + _info_axis_name: Literal["columns"] = "columns" index = properties.AxisProperty( axis=1, doc="The index (row labels) of the DataFrame." diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7e3d5034ef68d..f023e5b6adf04 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -44,6 +44,7 @@ AnyArrayLike, ArrayLike, Axis, + AxisInt, ColspaceArgType, CompressionOptions, Dtype, @@ -485,10 +486,10 @@ def _data(self): # Axis _stat_axis_number = 0 _stat_axis_name = "index" - _AXIS_ORDERS: list[str] - _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = {0: 0, "index": 0, "rows": 0} + _AXIS_ORDERS: list[Literal["index", "columns"]] + _AXIS_TO_AXIS_NUMBER: dict[Axis, AxisInt] = {0: 0, "index": 0, "rows": 0} _info_axis_number: int - _info_axis_name: str + _info_axis_name: Literal["index", "columns"] _AXIS_LEN: int @property @@ -511,10 +512,12 @@ def _AXIS_NAMES(self) -> dict[int, str]: return {0: "index"} @final - def _construct_axes_dict(self, axes=None, **kwargs): + def _construct_axes_dict(self, axes: Sequence[Axis] | None = None, **kwargs): """Return an axes dictionary for myself.""" d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} - d.update(kwargs) + # error: Argument 1 to "update" of "MutableMapping" has incompatible type + # "Dict[str, Any]"; expected "SupportsKeysAndGetItem[Union[int, str], Any]" + d.update(kwargs) # type: ignore[arg-type] return d @final @@ -551,7 +554,7 @@ def _construct_axes_from_arguments( @final @classmethod - def _get_axis_number(cls, axis: Axis) -> int: + def _get_axis_number(cls, axis: Axis) -> AxisInt: try: return cls._AXIS_TO_AXIS_NUMBER[axis] except KeyError: @@ -559,7 +562,7 @@ def _get_axis_number(cls, axis: Axis) -> int: @final @classmethod - def _get_axis_name(cls, axis: Axis) -> str: + def _get_axis_name(cls, axis: Axis) -> Literal["index", "columns"]: axis_number = cls._get_axis_number(axis) return cls._AXIS_ORDERS[axis_number] @@ -818,7 +821,7 @@ def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t): setattr(obj, obj._get_axis_name(axis), labels) return obj - def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None: + def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: labels = ensure_index(labels) self._mgr.set_axis(axis, labels) self._clear_item_cache() @@ -926,7 +929,7 @@ def pop(self, item: Hashable) -> Series | Any: return result @final - def squeeze(self, axis=None): + def squeeze(self, axis: Axis | None = None): """ Squeeze 1 dimensional axis objects into scalars. @@ -1029,10 +1032,10 @@ def squeeze(self, axis=None): >>> df_0a.squeeze() 1 """ - axis = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),) + axes = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),) return self.iloc[ tuple( - 0 if i in axis and len(a) == 1 else slice(None) + 0 if i in axes and len(a) == 1 else slice(None) for i, a in enumerate(self.axes) ) ] @@ -1727,7 +1730,7 @@ def _is_label_reference(self, key: Level, axis=0) -> bool_t: ) @final - def _is_label_or_level_reference(self, key: Level, axis: int = 0) -> bool_t: + def _is_label_or_level_reference(self, key: Level, axis: AxisInt = 0) -> bool_t: """ Test whether a key is a label or level reference for a given axis. @@ -1752,7 +1755,7 @@ def _is_label_or_level_reference(self, key: Level, axis: int = 0) -> bool_t: ) @final - def _check_label_or_level_ambiguity(self, key: Level, axis: int = 0) -> None: + def _check_label_or_level_ambiguity(self, key: Level, axis: AxisInt = 0) -> None: """ Check whether `key` is ambiguous. @@ -1797,7 +1800,7 @@ def _check_label_or_level_ambiguity(self, key: Level, axis: int = 0) -> None: raise ValueError(msg) @final - def _get_label_or_level_values(self, key: Level, axis: int = 0) -> ArrayLike: + def _get_label_or_level_values(self, key: Level, axis: AxisInt = 0) -> ArrayLike: """ Return a 1-D array of values associated with `key`, a label or level from the given `axis`. @@ -1869,7 +1872,7 @@ def _get_label_or_level_values(self, key: Level, axis: int = 0) -> ArrayLike: return values @final - def _drop_labels_or_levels(self, keys, axis: int = 0): + def _drop_labels_or_levels(self, keys, axis: AxisInt = 0): """ Drop labels and/or levels for the given `axis`. @@ -4685,11 +4688,13 @@ def add_prefix(self: NDFrameT, prefix: str, axis: Axis | None = None) -> NDFrame axis_name = self._get_axis_name(axis) mapper = {axis_name: f} + # error: Incompatible return value type (got "Optional[NDFrameT]", # expected "NDFrameT") # error: Argument 1 to "rename" of "NDFrame" has incompatible type # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" - return self._rename(**mapper) # type: ignore[return-value, arg-type] + # error: Keywords must be strings + return self._rename(**mapper) # type: ignore[return-value, arg-type, misc] @final def add_suffix(self: NDFrameT, suffix: str, axis: Axis | None = None) -> NDFrameT: @@ -4761,7 +4766,8 @@ def add_suffix(self: NDFrameT, suffix: str, axis: Axis | None = None) -> NDFrame # expected "NDFrameT") # error: Argument 1 to "rename" of "NDFrame" has incompatible type # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" - return self._rename(**mapper) # type: ignore[return-value, arg-type] + # error: Keywords must be strings + return self._rename(**mapper) # type: ignore[return-value, arg-type, misc] @overload def sort_values( @@ -5400,7 +5406,7 @@ def filter( items=None, like: str | None = None, regex: str | None = None, - axis=None, + axis: Axis | None = None, ) -> NDFrameT: """ Subset the dataframe rows or columns according to the specified index labels. @@ -5478,7 +5484,10 @@ def filter( if items is not None: name = self._get_axis_name(axis) - return self.reindex(**{name: [r for r in items if r in labels]}) + # error: Keywords must be strings + return self.reindex( # type: ignore[misc] + **{name: [r for r in items if r in labels]} + ) elif like: def f(x) -> bool_t: @@ -8268,7 +8277,9 @@ def asfreq( ) @final - def at_time(self: NDFrameT, time, asof: bool_t = False, axis=None) -> NDFrameT: + def at_time( + self: NDFrameT, time, asof: bool_t = False, axis: Axis | None = None + ) -> NDFrameT: """ Select values at particular time of day (e.g., 9:30AM). @@ -8331,7 +8342,7 @@ def between_time( include_start: bool_t | lib.NoDefault = lib.no_default, include_end: bool_t | lib.NoDefault = lib.no_default, inclusive: IntervalClosedType | None = None, - axis=None, + axis: Axis | None = None, ) -> NDFrameT: """ Select values between particular times of the day (e.g., 9:00-9:30 AM). @@ -9495,7 +9506,7 @@ def _align_frame( self, other, join="outer", - axis=None, + axis: Axis | None = None, level=None, copy: bool_t = True, fill_value=None, @@ -9559,7 +9570,7 @@ def _align_series( self, other, join="outer", - axis=None, + axis: Axis | None = None, level=None, copy: bool_t = True, fill_value=None, @@ -9644,7 +9655,7 @@ def _where( cond, other=lib.no_default, inplace: bool_t = False, - axis=None, + axis: Axis | None = None, level=None, ): """ @@ -10329,7 +10340,11 @@ def tshift(self: NDFrameT, periods: int = 1, freq=None, axis: Axis = 0) -> NDFra return self.shift(periods, freq, axis) def truncate( - self: NDFrameT, before=None, after=None, axis=None, copy: bool_t = True + self: NDFrameT, + before=None, + after=None, + axis: Axis | None = None, + copy: bool_t = True, ) -> NDFrameT: """ Truncate a Series or DataFrame before and after some index value. @@ -11675,7 +11690,7 @@ def all( see_also="", examples="", ) - def mad(self, axis=None, skipna: bool_t = True, level=None): + def mad(self, axis: Axis | None = None, skipna: bool_t = True, level=None): return NDFrame.mad(self, axis, skipna, level) setattr(cls, "mad", mad) @@ -11693,7 +11708,7 @@ def mad(self, axis=None, skipna: bool_t = True, level=None): ) def sem( self, - axis=None, + axis: Axis | None = None, skipna: bool_t = True, level=None, ddof=1, @@ -11716,7 +11731,7 @@ def sem( ) def var( self, - axis=None, + axis: Axis | None = None, skipna: bool_t = True, level=None, ddof=1, @@ -11740,7 +11755,7 @@ def var( ) def std( self, - axis=None, + axis: Axis | None = None, skipna: bool_t = True, level=None, ddof=1, @@ -11760,7 +11775,9 @@ def std( accum_func_name="min", examples=_cummin_examples, ) - def cummin(self, axis=None, skipna: bool_t = True, *args, **kwargs): + def cummin( + self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs + ): return NDFrame.cummin(self, axis, skipna, *args, **kwargs) setattr(cls, "cummin", cummin) @@ -11774,7 +11791,9 @@ def cummin(self, axis=None, skipna: bool_t = True, *args, **kwargs): accum_func_name="max", examples=_cummax_examples, ) - def cummax(self, axis=None, skipna: bool_t = True, *args, **kwargs): + def cummax( + self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs + ): return NDFrame.cummax(self, axis, skipna, *args, **kwargs) setattr(cls, "cummax", cummax) @@ -11788,7 +11807,9 @@ def cummax(self, axis=None, skipna: bool_t = True, *args, **kwargs): accum_func_name="sum", examples=_cumsum_examples, ) - def cumsum(self, axis=None, skipna: bool_t = True, *args, **kwargs): + def cumsum( + self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs + ): return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) setattr(cls, "cumsum", cumsum) @@ -11802,7 +11823,9 @@ def cumsum(self, axis=None, skipna: bool_t = True, *args, **kwargs): accum_func_name="prod", examples=_cumprod_examples, ) - def cumprod(self, axis=None, skipna: bool_t = True, *args, **kwargs): + def cumprod( + self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs + ): return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) setattr(cls, "cumprod", cumprod) @@ -11821,7 +11844,7 @@ def cumprod(self, axis=None, skipna: bool_t = True, *args, **kwargs): ) def sum( self, - axis=None, + axis: Axis | None = None, skipna: bool_t = True, level=None, numeric_only=None, @@ -11846,7 +11869,7 @@ def sum( ) def prod( self, - axis=None, + axis: Axis | None = None, skipna: bool_t = True, level=None, numeric_only=None, @@ -11872,7 +11895,7 @@ def prod( ) def mean( self, - axis: int | None | lib.NoDefault = lib.no_default, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, numeric_only=None, @@ -11894,7 +11917,7 @@ def mean( ) def skew( self, - axis: int | None | lib.NoDefault = lib.no_default, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, numeric_only=None, @@ -11942,7 +11965,7 @@ def kurt( ) def median( self, - axis: int | None | lib.NoDefault = lib.no_default, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, numeric_only=None, @@ -11966,7 +11989,7 @@ def median( ) def max( self, - axis: int | None | lib.NoDefault = lib.no_default, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, numeric_only=None, @@ -11990,7 +12013,7 @@ def max( ) def min( self, - axis: int | None | lib.NoDefault = lib.no_default, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, numeric_only=None, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7185db8d21cfc..6a2070eb4f46f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -37,6 +37,7 @@ from pandas._typing import ( ArrayLike, Axis, + AxisInt, FillnaOptions, IndexLabel, Level, @@ -439,7 +440,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): ) def _cython_transform( - self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + self, how: str, numeric_only: bool = True, axis: AxisInt = 0, **kwargs ): assert axis == 0 # handled by caller @@ -1306,7 +1307,7 @@ def _cython_transform( self, how: str, numeric_only: bool | lib.NoDefault = lib.no_default, - axis: int = 0, + axis: AxisInt = 0, **kwargs, ) -> DataFrame: assert axis == 0 # handled by caller diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 49761ebd6c06e..7528f0e85ef1b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -45,6 +45,7 @@ class providing the base-class of operations. import pandas._libs.groupby as libgroupby from pandas._typing import ( ArrayLike, + AxisInt, Dtype, IndexLabel, NDFrameT, @@ -644,7 +645,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): "squeeze", } - axis: int + axis: AxisInt grouper: ops.BaseGrouper keys: _KeysArgType | None = None group_keys: bool | lib.NoDefault @@ -917,7 +918,7 @@ def __init__( self, obj: NDFrameT, keys: _KeysArgType | None = None, - axis: int = 0, + axis: AxisInt = 0, level: IndexLabel | None = None, grouper: ops.BaseGrouper | None = None, exclusions: frozenset[Hashable] | None = None, @@ -1312,7 +1313,7 @@ def _wrap_applied_output( raise AbstractMethodError(self) def _resolve_numeric_only( - self, how: str, numeric_only: bool | lib.NoDefault, axis: int + self, how: str, numeric_only: bool | lib.NoDefault, axis: AxisInt ) -> bool: """ Determine subclass-specific default value for 'numeric_only'. @@ -1795,7 +1796,7 @@ def array_func(values: ArrayLike) -> ArrayLike: return res def _cython_transform( - self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + self, how: str, numeric_only: bool = True, axis: AxisInt = 0, **kwargs ): raise AbstractMethodError(self) @@ -2539,7 +2540,7 @@ def first(self, numeric_only: bool = False, min_count: int = -1): 3 6.0 3 """ - def first_compat(obj: NDFrameT, axis: int = 0): + def first_compat(obj: NDFrameT, axis: AxisInt = 0): def first(x: Series): """Helper function for first item that isn't NA.""" arr = x.array[notna(x.array)] @@ -2599,7 +2600,7 @@ def last(self, numeric_only: bool = False, min_count: int = -1): 3 6.0 3 """ - def last_compat(obj: NDFrameT, axis: int = 0): + def last_compat(obj: NDFrameT, axis: AxisInt = 0): def last(x: Series): """Helper function for last item that isn't NA.""" arr = x.array[notna(x.array)] @@ -3550,7 +3551,7 @@ def rank( ascending: bool = True, na_option: str = "keep", pct: bool = False, - axis: int = 0, + axis: AxisInt = 0, ) -> NDFrameT: """ Provide the rank of values within each group. @@ -3921,7 +3922,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): @final @Substitution(name="groupby") @Appender(_common_see_also) - def diff(self, periods: int = 1, axis: int = 0) -> NDFrameT: + def diff(self, periods: int = 1, axis: AxisInt = 0) -> NDFrameT: """ First discrete difference of element. @@ -4330,7 +4331,7 @@ def sample( def get_groupby( obj: NDFrame, by: _KeysArgType | None = None, - axis: int = 0, + axis: AxisInt = 0, level=None, grouper: ops.BaseGrouper | None = None, exclusions=None, diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 5fc713d84e842..da638ca520ffd 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -18,6 +18,7 @@ from pandas._typing import ( ArrayLike, + AxisInt, NDFrameT, npt, ) @@ -259,7 +260,7 @@ class Grouper: Freq: 17T, dtype: int64 """ - axis: int + axis: AxisInt sort: bool dropna: bool _gpr_index: Index | None @@ -280,7 +281,7 @@ def __init__( key=None, level=None, freq=None, - axis: int = 0, + axis: AxisInt = 0, sort: bool = False, dropna: bool = True, ) -> None: @@ -704,7 +705,7 @@ def groups(self) -> dict[Hashable, np.ndarray]: def get_grouper( obj: NDFrameT, key=None, - axis: int = 0, + axis: AxisInt = 0, level=None, sort: bool = True, observed: bool = False, diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 597932a55f897..a97e41cbc508a 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -30,6 +30,7 @@ import pandas._libs.reduction as libreduction from pandas._typing import ( ArrayLike, + AxisInt, DtypeObj, NDFrameT, Shape, @@ -675,7 +676,7 @@ def cython_operation( self, *, values: ArrayLike, - axis: int, + axis: AxisInt, min_count: int = -1, comp_ids: np.ndarray, ngroups: int, @@ -780,7 +781,7 @@ def nkeys(self) -> int: return len(self.groupings) def get_iterator( - self, data: NDFrameT, axis: int = 0 + self, data: NDFrameT, axis: AxisInt = 0 ) -> Iterator[tuple[Hashable, NDFrameT]]: """ Groupby iterator @@ -795,7 +796,7 @@ def get_iterator( yield from zip(keys, splitter) @final - def _get_splitter(self, data: NDFrame, axis: int = 0) -> DataSplitter: + def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter: """ Returns ------- @@ -826,7 +827,7 @@ def group_keys_seq(self): @final def apply( - self, f: Callable, data: DataFrame | Series, axis: int = 0 + self, f: Callable, data: DataFrame | Series, axis: AxisInt = 0 ) -> tuple[list, bool]: mutated = self.mutated splitter = self._get_splitter(data, axis=axis) @@ -1029,7 +1030,7 @@ def _cython_operation( kind: str, values, how: str, - axis: int, + axis: AxisInt, min_count: int = -1, **kwargs, ) -> ArrayLike: @@ -1196,7 +1197,7 @@ def _get_grouper(self): """ return self - def get_iterator(self, data: NDFrame, axis: int = 0): + def get_iterator(self, data: NDFrame, axis: AxisInt = 0): """ Groupby iterator @@ -1284,7 +1285,7 @@ def _aggregate_series_fast(self, obj: Series, func: Callable) -> NoReturn: ) -def _is_indexed_like(obj, axes, axis: int) -> bool: +def _is_indexed_like(obj, axes, axis: AxisInt) -> bool: if isinstance(obj, Series): if len(axes) > 1: return False @@ -1305,7 +1306,7 @@ def __init__( data: NDFrameT, labels: npt.NDArray[np.intp], ngroups: int, - axis: int = 0, + axis: AxisInt = 0, ) -> None: self.data = data self.labels = ensure_platform_int(labels) # _should_ already be np.intp @@ -1366,7 +1367,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: def get_splitter( - data: NDFrame, labels: np.ndarray, ngroups: int, axis: int = 0 + data: NDFrame, labels: np.ndarray, ngroups: int, axis: AxisInt = 0 ) -> DataSplitter: if isinstance(data, Series): klass: type[DataSplitter] = SeriesSplitter diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 66ed828fba528..8ca47b17d1a60 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -49,6 +49,7 @@ AnyAll, ArrayLike, Axes, + AxisInt, Dtype, DtypeObj, F, @@ -1176,7 +1177,12 @@ def astype(self, dtype, copy: bool = True): @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take( - self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs + self, + indices, + axis: AxisInt = 0, + allow_fill: bool = True, + fill_value=None, + **kwargs, ): if kwargs: nv.validate_take((), kwargs) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ca92154e684b4..06021dfa93325 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -32,6 +32,7 @@ from pandas._typing import ( AnyAll, AnyArrayLike, + AxisInt, DtypeObj, F, Scalar, @@ -2165,7 +2166,7 @@ def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex: def take( self: MultiIndex, indices, - axis: int = 0, + axis: AxisInt = 0, allow_fill: bool = True, fill_value=None, **kwargs, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 009c4ecc6bddd..37d211d4d3cf0 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -16,6 +16,10 @@ from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim +from pandas._typing import ( + Axis, + AxisInt, +) from pandas.errors import ( AbstractMethodError, IndexingError, @@ -655,19 +659,23 @@ def iat(self) -> _iAtIndexer: class _LocationIndexer(NDFrameIndexerBase): _valid_types: str - axis: int | None = None + axis: AxisInt | None = None # sub-classes need to set _takeable _takeable: bool @final - def __call__(self: _LocationIndexerT, axis=None) -> _LocationIndexerT: + def __call__( + self: _LocationIndexerT, axis: Axis | None = None + ) -> _LocationIndexerT: # we need to return a copy of ourselves new_self = type(self)(self.name, self.obj) if axis is not None: - axis = self.obj._get_axis_number(axis) - new_self.axis = axis + axis_int_none = self.obj._get_axis_number(axis) + else: + axis_int_none = axis + new_self.axis = axis_int_none return new_self def _get_setitem_indexer(self, key): @@ -818,7 +826,7 @@ def __setitem__(self, key, value) -> None: iloc = self if self.name == "iloc" else self.obj.iloc iloc._setitem_with_indexer(indexer, value, self.name) - def _validate_key(self, key, axis: int): + def _validate_key(self, key, axis: AxisInt): """ Ensure that key is valid for current indexer. @@ -1050,7 +1058,7 @@ def _getitem_nested_tuple(self, tup: tuple): return obj - def _convert_to_indexer(self, key, axis: int): + def _convert_to_indexer(self, key, axis: AxisInt): raise AbstractMethodError(self) @final @@ -1075,14 +1083,14 @@ def _is_scalar_access(self, key: tuple): def _getitem_tuple(self, tup: tuple): raise AbstractMethodError(self) - def _getitem_axis(self, key, axis: int): + def _getitem_axis(self, key, axis: AxisInt): raise NotImplementedError() def _has_valid_setitem_indexer(self, indexer) -> bool: raise AbstractMethodError(self) @final - def _getbool_axis(self, key, axis: int): + def _getbool_axis(self, key, axis: AxisInt): # caller is responsible for ensuring non-None axis labels = self.obj._get_axis(axis) key = check_bool_indexer(labels, key) @@ -1103,7 +1111,7 @@ class _LocIndexer(_LocationIndexer): # Key Checks @doc(_LocationIndexer._validate_key) - def _validate_key(self, key, axis: int): + def _validate_key(self, key, axis: AxisInt): # valid for a collection of labels (we check their presence later) # slice of labels (where start-end in labels) # slice of integers (only if in the labels) @@ -1207,7 +1215,7 @@ def _multi_take(self, tup: tuple): # ------------------------------------------------------------------- - def _getitem_iterable(self, key, axis: int): + def _getitem_iterable(self, key, axis: AxisInt): """ Index current object with an iterable collection of keys. @@ -1252,7 +1260,7 @@ def _getitem_tuple(self, tup: tuple): return self._getitem_tuple_same_dim(tup) - def _get_label(self, label, axis: int): + def _get_label(self, label, axis: AxisInt): # GH#5567 this will fail if the label is not present in the axis. return self.obj.xs(label, axis=axis) @@ -1270,7 +1278,7 @@ def _handle_lowerdim_multi_index_axis0(self, tup: tuple): raise ek raise IndexingError("No label returned") from ek - def _getitem_axis(self, key, axis: int): + def _getitem_axis(self, key, axis: AxisInt): key = item_from_zerodim(key) if is_iterator(key): key = list(key) @@ -1308,7 +1316,7 @@ def _getitem_axis(self, key, axis: int): self._validate_key(key, axis) return self._get_label(key, axis=axis) - def _get_slice_axis(self, slice_obj: slice, axis: int): + def _get_slice_axis(self, slice_obj: slice, axis: AxisInt): """ This is pretty simple as we just have to deal with labels. """ @@ -1327,7 +1335,7 @@ def _get_slice_axis(self, slice_obj: slice, axis: int): # return a DatetimeIndex instead of a slice object. return self.obj.take(indexer, axis=axis) - def _convert_to_indexer(self, key, axis: int): + def _convert_to_indexer(self, key, axis: AxisInt): """ Convert indexing key into something we can use to do actual fancy indexing on a ndarray. @@ -1400,7 +1408,7 @@ def _convert_to_indexer(self, key, axis: int): return {"key": key} raise - def _get_listlike_indexer(self, key, axis: int): + def _get_listlike_indexer(self, key, axis: AxisInt): """ Transform a list-like of keys into a new index and an indexer. @@ -1442,7 +1450,7 @@ class _iLocIndexer(_LocationIndexer): # ------------------------------------------------------------------- # Key Checks - def _validate_key(self, key, axis: int): + def _validate_key(self, key, axis: AxisInt): if com.is_bool_indexer(key): if hasattr(key, "index") and isinstance(key.index, Index): if key.index.inferred_type == "integer": @@ -1533,7 +1541,7 @@ def _is_scalar_access(self, key: tuple) -> bool: return all(is_integer(k) for k in key) - def _validate_integer(self, key: int, axis: int) -> None: + def _validate_integer(self, key: int, axis: AxisInt) -> None: """ Check that 'key' is a valid position in the desired axis. @@ -1563,7 +1571,7 @@ def _getitem_tuple(self, tup: tuple): return self._getitem_tuple_same_dim(tup) - def _get_list_axis(self, key, axis: int): + def _get_list_axis(self, key, axis: AxisInt): """ Return Series values by list or array of integers. @@ -1586,7 +1594,7 @@ def _get_list_axis(self, key, axis: int): # re-raise with different error message raise IndexError("positional indexers are out-of-bounds") from err - def _getitem_axis(self, key, axis: int): + def _getitem_axis(self, key, axis: AxisInt): if key is Ellipsis: key = slice(None) elif isinstance(key, ABCDataFrame): @@ -1623,7 +1631,7 @@ def _getitem_axis(self, key, axis: int): return self.obj._ixs(key, axis=axis) - def _get_slice_axis(self, slice_obj: slice, axis: int): + def _get_slice_axis(self, slice_obj: slice, axis: AxisInt): # caller is responsible for ensuring non-None axis obj = self.obj @@ -1634,7 +1642,7 @@ def _get_slice_axis(self, slice_obj: slice, axis: int): labels._validate_positional_slice(slice_obj) return self.obj._slice(slice_obj, axis=axis) - def _convert_to_indexer(self, key, axis: int): + def _convert_to_indexer(self, key, axis: AxisInt): """ Much simpler as we only have to deal with our valid types. """ @@ -2468,7 +2476,7 @@ def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]: return tuple(_tup) -def _tupleize_axis_indexer(ndim: int, axis: int, key) -> tuple: +def _tupleize_axis_indexer(ndim: int, axis: AxisInt, key) -> tuple: """ If we have an axis, adapt the given key to be axis-independent. """ diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 45318a0bcd7f2..d2c626a9cf247 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -21,6 +21,7 @@ ) from pandas._typing import ( ArrayLike, + AxisInt, DtypeObj, npt, ) @@ -160,12 +161,12 @@ def shape_proper(self) -> tuple[int, ...]: return tuple(len(ax) for ax in self._axes) @staticmethod - def _normalize_axis(axis: int) -> int: + def _normalize_axis(axis: AxisInt) -> int: # switch axis axis = 1 if axis == 0 else 0 return axis - def set_axis(self, axis: int, new_labels: Index) -> None: + def set_axis(self, axis: AxisInt, new_labels: Index) -> None: # Caller is responsible for ensuring we have an Index object. self._validate_set_axis(axis, new_labels) axis = self._normalize_axis(axis) @@ -355,14 +356,14 @@ def putmask(self: T, mask, new, align: bool = True) -> T: new=new, ) - def diff(self: T, n: int, axis: int) -> T: + def diff(self: T, n: int, axis: AxisInt) -> T: assert self.ndim == 2 and axis == 0 # caller ensures return self.apply(algos.diff, n=n, axis=axis) def interpolate(self: T, **kwargs) -> T: return self.apply_with_block("interpolate", swap_axis=False, **kwargs) - def shift(self: T, periods: int, axis: int, fill_value) -> T: + def shift(self: T, periods: int, axis: AxisInt, fill_value) -> T: if fill_value is lib.no_default: fill_value = None @@ -542,7 +543,7 @@ def reindex_indexer( self: T, new_axis, indexer, - axis: int, + axis: AxisInt, fill_value=None, allow_dups: bool = False, copy: bool = True, @@ -566,7 +567,7 @@ def _reindex_indexer( self: T, new_axis, indexer: npt.NDArray[np.intp] | None, - axis: int, + axis: AxisInt, fill_value=None, allow_dups: bool = False, copy: bool = True, @@ -644,7 +645,7 @@ def _reindex_indexer( def take( self: T, indexer, - axis: int = 1, + axis: AxisInt = 1, verify: bool = True, convert_indices: bool = True, ) -> T: @@ -778,7 +779,7 @@ def fast_xs(self, loc: int) -> SingleArrayManager: result = np.array(values, dtype=dtype) return SingleArrayManager([result], [self._axes[1]]) - def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager: + def get_slice(self, slobj: slice, axis: AxisInt = 0) -> ArrayManager: axis = self._normalize_axis(axis) if axis == 0: @@ -1054,7 +1055,7 @@ def quantile( self, *, qs: Float64Index, - axis: int = 0, + axis: AxisInt = 0, transposed: bool = False, interpolation="linear", ) -> ArrayManager: @@ -1284,7 +1285,7 @@ def is_single_block(self) -> bool: def fast_xs(self, loc: int) -> SingleArrayManager: raise NotImplementedError("Use series._values[loc] instead") - def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager: + def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleArrayManager: if axis >= self.ndim: raise IndexError("Requested axis not found in manager") diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index ddc4495318568..5b6dcba2371d9 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -14,6 +14,7 @@ from pandas._typing import ( ArrayLike, + AxisInt, DtypeObj, Shape, ) @@ -56,7 +57,7 @@ def shape(self) -> Shape: return tuple(len(ax) for ax in self.axes) @final - def _validate_set_axis(self, axis: int, new_labels: Index) -> None: + def _validate_set_axis(self, axis: AxisInt, new_labels: Index) -> None: # Caller is responsible for ensuring we have an Index object. old_len = len(self.axes[axis]) new_len = len(new_labels) @@ -76,7 +77,7 @@ def reindex_indexer( self: T, new_axis, indexer, - axis: int, + axis: AxisInt, fill_value=None, allow_dups: bool = False, copy: bool = True, @@ -88,7 +89,7 @@ def reindex_indexer( def reindex_axis( self: T, new_index: Index, - axis: int, + axis: AxisInt, fill_value=None, only_slice: bool = False, ) -> T: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 824e645977e2c..432431c89b334 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -26,6 +26,7 @@ from pandas._libs.tslibs import IncompatibleFrequency from pandas._typing import ( ArrayLike, + AxisInt, DtypeObj, F, FillnaOptions, @@ -861,7 +862,7 @@ def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None: def take_nd( self, indexer: npt.NDArray[np.intp], - axis: int, + axis: AxisInt, new_mgr_locs: BlockPlacement | None = None, fill_value=lib.no_default, ) -> Block: @@ -1214,7 +1215,7 @@ def fillna( def interpolate( self, method: str = "pad", - axis: int = 0, + axis: AxisInt = 0, index: Index | None = None, inplace: bool = False, limit: int | None = None, @@ -1275,13 +1276,15 @@ def interpolate( nb = self.make_block_same_class(data) return nb._maybe_downcast([nb], downcast) - def diff(self, n: int, axis: int = 1) -> list[Block]: + def diff(self, n: int, axis: AxisInt = 1) -> list[Block]: """return block for the diff of the values""" # only reached with ndim == 2 and axis == 1 new_values = algos.diff(self.values, n, axis=axis) return [self.make_block(values=new_values)] - def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: + def shift( + self, periods: int, axis: AxisInt = 0, fill_value: Any = None + ) -> list[Block]: """shift the block by periods, possibly upcast""" # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also @@ -1315,7 +1318,7 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Blo @final def quantile( - self, qs: Float64Index, interpolation="linear", axis: int = 0 + self, qs: Float64Index, interpolation="linear", axis: AxisInt = 0 ) -> Block: """ compute the quantiles of the @@ -1816,13 +1819,15 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock: new_values = self.values[slicer] return type(self)(new_values, self._mgr_locs, ndim=self.ndim) - def diff(self, n: int, axis: int = 1) -> list[Block]: + def diff(self, n: int, axis: AxisInt = 1) -> list[Block]: # only reached with ndim == 2 and axis == 1 # TODO(EA2D): Can share with NDArrayBackedExtensionBlock new_values = algos.diff(self.values, n, axis=0) return [self.make_block(values=new_values)] - def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: + def shift( + self, periods: int, axis: AxisInt = 0, fill_value: Any = None + ) -> list[Block]: """ Shift the block by `periods`. @@ -1924,7 +1929,7 @@ def is_view(self) -> bool: # check the ndarray values of the DatetimeIndex values return self.values._ndarray.base is not None - def diff(self, n: int, axis: int = 0) -> list[Block]: + def diff(self, n: int, axis: AxisInt = 0) -> list[Block]: """ 1st discrete difference. @@ -1950,7 +1955,9 @@ def diff(self, n: int, axis: int = 0) -> list[Block]: new_values = values - values.shift(n, axis=axis) return [self.make_block(new_values)] - def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]: + def shift( + self, periods: int, axis: AxisInt = 0, fill_value: Any = None + ) -> list[Block]: values = self.values new_values = values.shift(periods, fill_value=fill_value, axis=axis) return [self.make_block_same_class(new_values)] diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 2224c6e36678e..c8ad7dd328edf 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -17,6 +17,7 @@ from pandas._libs.missing import NA from pandas._typing import ( ArrayLike, + AxisInt, DtypeObj, Manager, Shape, @@ -70,7 +71,7 @@ def _concatenate_array_managers( - mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool + mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool ) -> Manager: """ Concatenate array managers into one. @@ -174,7 +175,7 @@ def concat_arrays(to_concat: list) -> ArrayLike: def concatenate_managers( - mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool + mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool ) -> Manager: """ Concatenate block managers into one. @@ -525,7 +526,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: def _concatenate_join_units( - join_units: list[JoinUnit], concat_axis: int, copy: bool + join_units: list[JoinUnit], concat_axis: AxisInt, copy: bool ) -> ArrayLike: """ Concatenate values from several join units along selected axis. @@ -702,7 +703,7 @@ def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit: return JoinUnit(block=extra_block, indexers=extra_indexers, shape=extra_shape) -def _combine_concat_plans(plans, concat_axis: int): +def _combine_concat_plans(plans, concat_axis: AxisInt): """ Combine multiple concatenation plans into one. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9b78d443c11de..c6762b72f82d7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -26,6 +26,7 @@ from pandas._libs.internals import BlockPlacement from pandas._typing import ( ArrayLike, + AxisInt, DtypeObj, Shape, npt, @@ -217,13 +218,13 @@ def __nonzero__(self) -> bool: # Python3 compat __bool__ = __nonzero__ - def _normalize_axis(self, axis: int) -> int: + def _normalize_axis(self, axis: AxisInt) -> int: # switch axis to follow BlockManager logic if self.ndim == 2: axis = 1 if axis == 0 else 0 return axis - def set_axis(self, axis: int, new_labels: Index) -> None: + def set_axis(self, axis: AxisInt, new_labels: Index) -> None: # Caller is responsible for ensuring we have an Index object. self._validate_set_axis(axis, new_labels) self.axes[axis] = new_labels @@ -412,7 +413,7 @@ def putmask(self, mask, new, align: bool = True): new=new, ) - def diff(self: T, n: int, axis: int) -> T: + def diff(self: T, n: int, axis: AxisInt) -> T: # only reached with self.ndim == 2 and axis == 1 axis = self._normalize_axis(axis) return self.apply("diff", n=n, axis=axis) @@ -420,7 +421,7 @@ def diff(self: T, n: int, axis: int) -> T: def interpolate(self: T, **kwargs) -> T: return self.apply("interpolate", **kwargs) - def shift(self: T, periods: int, axis: int, fill_value) -> T: + def shift(self: T, periods: int, axis: AxisInt, fill_value) -> T: axis = self._normalize_axis(axis) if fill_value is lib.no_default: fill_value = None @@ -684,7 +685,7 @@ def reindex_indexer( self: T, new_axis: Index, indexer: npt.NDArray[np.intp] | None, - axis: int, + axis: AxisInt, fill_value=None, allow_dups: bool = False, copy: bool | None = True, @@ -939,7 +940,7 @@ def _make_na_block( def take( self: T, indexer, - axis: int = 1, + axis: AxisInt = 1, verify: bool = True, convert_indices: bool = True, ) -> T: @@ -1580,7 +1581,7 @@ def quantile( self: T, *, qs: Float64Index, - axis: int = 0, + axis: AxisInt = 0, interpolation="linear", ) -> T: """ @@ -2011,7 +2012,7 @@ def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockMana ref = weakref.ref(blk) return type(self)(block, new_idx, [ref]) - def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: + def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager: # Assertion disabled for performance # assert isinstance(slobj, slice), type(slobj) if axis >= self.ndim: diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ac44d6e80adc1..242ee43b32393 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -22,6 +22,7 @@ from pandas._typing import ( ArrayLike, Axis, + AxisInt, F, npt, ) @@ -209,7 +210,7 @@ def find_valid_index(values, *, how: str) -> int | None: def interpolate_array_2d( data: np.ndarray, method: str = "pad", - axis: int = 0, + axis: AxisInt = 0, index: Index | None = None, limit: int | None = None, limit_direction: str = "forward", @@ -263,7 +264,7 @@ def interpolate_array_2d( def _interpolate_2d_with_fill( data: np.ndarray, # floating dtype index: Index, - axis: int, + axis: AxisInt, method: str = "linear", limit: int | None = None, limit_direction: str = "forward", diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 8566d901e0b03..e61970b1d7dcb 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -22,6 +22,7 @@ ) from pandas._typing import ( ArrayLike, + AxisInt, Dtype, DtypeObj, F, @@ -120,7 +121,7 @@ def __call__(self, alt: F) -> F: def f( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, **kwds, ): @@ -404,7 +405,7 @@ def _datetimelike_compat(func: F) -> F: def new_func( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, **kwargs, @@ -428,7 +429,7 @@ def new_func( return cast(F, new_func) -def _na_for_min_count(values: np.ndarray, axis: int | None) -> Scalar | np.ndarray: +def _na_for_min_count(values: np.ndarray, axis: AxisInt | None) -> Scalar | np.ndarray: """ Return the missing value for `values`. @@ -467,7 +468,7 @@ def maybe_operate_rowwise(func: F) -> F: """ @functools.wraps(func) - def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): + def newfunc(values: np.ndarray, *, axis: AxisInt | None = None, **kwargs): if ( axis == 1 and values.ndim == 2 @@ -496,7 +497,7 @@ def newfunc(values: np.ndarray, *, axis: int | None = None, **kwargs): def nanany( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ) -> bool: @@ -542,7 +543,7 @@ def nanany( def nanall( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ) -> bool: @@ -591,7 +592,7 @@ def nanall( def nansum( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, min_count: int = 0, mask: npt.NDArray[np.bool_] | None = None, @@ -636,7 +637,7 @@ def nansum( def _mask_datetimelike_result( result: np.ndarray | np.datetime64 | np.timedelta64, - axis: int | None, + axis: AxisInt | None, mask: npt.NDArray[np.bool_], orig_values: np.ndarray, ) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType: @@ -659,7 +660,7 @@ def _mask_datetimelike_result( def nanmean( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ) -> float: @@ -720,7 +721,7 @@ def nanmean( @bottleneck_switch() -def nanmedian(values, *, axis=None, skipna: bool = True, mask=None): +def nanmedian(values, *, axis: AxisInt | None = None, skipna: bool = True, mask=None): """ Parameters ---------- @@ -796,7 +797,7 @@ def get_median(x): def get_empty_reduction_result( shape: tuple[int, ...], - axis: int, + axis: AxisInt, dtype: np.dtype | type[np.floating], fill_value: Any, ) -> np.ndarray: @@ -824,7 +825,7 @@ def get_empty_reduction_result( def _get_counts_nanvar( values_shape: Shape, mask: npt.NDArray[np.bool_] | None, - axis: int | None, + axis: AxisInt | None, ddof: int, dtype: np.dtype = np.dtype(np.float64), ) -> tuple[float | np.ndarray, float | np.ndarray]: @@ -869,7 +870,9 @@ def _get_counts_nanvar( @bottleneck_switch(ddof=1) -def nanstd(values, *, axis=None, skipna: bool = True, ddof=1, mask=None): +def nanstd( + values, *, axis: AxisInt | None = None, skipna: bool = True, ddof=1, mask=None +): """ Compute the standard deviation along given axis while ignoring NaNs @@ -909,7 +912,9 @@ def nanstd(values, *, axis=None, skipna: bool = True, ddof=1, mask=None): @disallow("M8", "m8") @bottleneck_switch(ddof=1) -def nanvar(values, *, axis=None, skipna: bool = True, ddof=1, mask=None): +def nanvar( + values, *, axis: AxisInt | None = None, skipna: bool = True, ddof=1, mask=None +): """ Compute the variance along given axis while ignoring NaNs @@ -980,7 +985,7 @@ def nanvar(values, *, axis=None, skipna: bool = True, ddof=1, mask=None): def nansem( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, ddof: int = 1, mask: npt.NDArray[np.bool_] | None = None, @@ -1032,7 +1037,7 @@ def _nanminmax(meth, fill_value_typ): def reduction( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ) -> Dtype: @@ -1064,7 +1069,7 @@ def reduction( def nanargmax( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ) -> int | np.ndarray: @@ -1110,7 +1115,7 @@ def nanargmax( def nanargmin( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ) -> int | np.ndarray: @@ -1157,7 +1162,7 @@ def nanargmin( def nanskew( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ) -> float: @@ -1245,7 +1250,7 @@ def nanskew( def nankurt( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, mask: npt.NDArray[np.bool_] | None = None, ) -> float: @@ -1342,7 +1347,7 @@ def nankurt( def nanprod( values: np.ndarray, *, - axis: int | None = None, + axis: AxisInt | None = None, skipna: bool = True, min_count: int = 0, mask: npt.NDArray[np.bool_] | None = None, @@ -1384,7 +1389,7 @@ def nanprod( def _maybe_arg_null_out( result: np.ndarray, - axis: int | None, + axis: AxisInt | None, mask: npt.NDArray[np.bool_] | None, skipna: bool, ) -> np.ndarray | int: @@ -1412,7 +1417,7 @@ def _maybe_arg_null_out( def _get_counts( values_shape: Shape, mask: npt.NDArray[np.bool_] | None, - axis: int | None, + axis: AxisInt | None, dtype: np.dtype = np.dtype(np.float64), ) -> float | np.ndarray: """ @@ -1452,7 +1457,7 @@ def _get_counts( def _maybe_null_out( result: np.ndarray | float | NaTType, - axis: int | None, + axis: AxisInt | None, mask: npt.NDArray[np.bool_] | None, shape: tuple[int, ...], min_count: int = 1, diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index dde4d07b7915c..c2a76b9a9ae19 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -13,7 +13,10 @@ import numpy as np from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op -from pandas._typing import Level +from pandas._typing import ( + AxisInt, + Level, +) from pandas.util._decorators import Appender from pandas.util._exceptions import find_stack_level @@ -386,7 +389,7 @@ def frame_arith_method_with_reindex(left: DataFrame, right: DataFrame, op) -> Da return result -def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: int): +def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: AxisInt): """ If the Series operand is not EA-dtype, we can broadcast to 2D and operate blockwise. diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2f4b0416007da..052add13efcd8 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -27,6 +27,7 @@ to_offset, ) from pandas._typing import ( + AxisInt, IndexLabel, NDFrameT, T, @@ -147,7 +148,7 @@ def __init__( self, obj: DataFrame | Series, groupby: TimeGrouper, - axis: int = 0, + axis: AxisInt = 0, kind=None, *, group_keys: bool | lib.NoDefault = lib.no_default, @@ -1922,7 +1923,7 @@ def _get_period_bins(self, ax: PeriodIndex): def _take_new_index( - obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: int = 0 + obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: AxisInt = 0 ) -> NDFrameT: if isinstance(obj, ABCSeries): diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f67bb8eac5da8..7ec91b23c54f2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -28,6 +28,7 @@ from pandas._typing import ( AnyArrayLike, ArrayLike, + AxisInt, DtypeObj, IndexLabel, Shape, @@ -619,8 +620,8 @@ class _MergeOperation: right_on: Sequence[Hashable | AnyArrayLike] left_index: bool right_index: bool - axis: int - bm_axis: int + axis: AxisInt + bm_axis: AxisInt sort: bool suffixes: Suffixes copy: bool @@ -638,7 +639,7 @@ def __init__( on: IndexLabel | None = None, left_on: IndexLabel | None = None, right_on: IndexLabel | None = None, - axis: int = 1, + axis: AxisInt = 1, left_index: bool = False, right_index: bool = False, sort: bool = True, @@ -1755,7 +1756,7 @@ def __init__( right_on: IndexLabel | None = None, left_index: bool = False, right_index: bool = False, - axis: int = 1, + axis: AxisInt = 1, suffixes: Suffixes = ("_x", "_y"), fill_method: str | None = None, how: str = "outer", @@ -1844,7 +1845,7 @@ def __init__( by=None, left_by=None, right_by=None, - axis: int = 1, + axis: AxisInt = 1, suffixes: Suffixes = ("_x", "_y"), copy: bool = True, fill_method: str | None = None, diff --git a/pandas/core/sample.py b/pandas/core/sample.py index 16fca2d0ff1b4..a9b236b58a9ba 100644 --- a/pandas/core/sample.py +++ b/pandas/core/sample.py @@ -8,6 +8,7 @@ import numpy as np from pandas._libs import lib +from pandas._typing import AxisInt from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -18,7 +19,7 @@ from pandas.core.generic import NDFrame -def preprocess_weights(obj: NDFrame, weights, axis: int) -> np.ndarray: +def preprocess_weights(obj: NDFrame, weights, axis: AxisInt) -> np.ndarray: """ Process and validate the `weights` argument to `NDFrame.sample` and `.GroupBy.sample`. diff --git a/pandas/core/series.py b/pandas/core/series.py index f2f929418718e..77a978647be31 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -39,6 +39,7 @@ AnyArrayLike, ArrayLike, Axis, + AxisInt, Dtype, DtypeObj, FilePath, @@ -214,8 +215,11 @@ def wrapper(self): # ---------------------------------------------------------------------- # Series class - -class Series(base.IndexOpsMixin, NDFrame): +# error: Definition of "max" in base class "IndexOpsMixin" is incompatible with +# definition in base class "NDFrame" +# error: Definition of "min" in base class "IndexOpsMixin" is incompatible with +# definition in base class "NDFrame" +class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] """ One-dimensional ndarray with axis labels (including time series). @@ -564,7 +568,7 @@ def _constructor_expanddim(self) -> Callable[..., DataFrame]: def _can_hold_na(self) -> bool: return self._mgr._can_hold_na - def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None: + def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: """ Override generic, we want to set the _typ here. @@ -946,7 +950,7 @@ def _take_with_is_copy(self, indices, axis=0) -> Series: """ return self.take(indices=indices, axis=axis) - def _ixs(self, i: int, axis: int = 0) -> Any: + def _ixs(self, i: int, axis: AxisInt = 0) -> Any: """ Return the i-th value or values in the Series by location. @@ -960,7 +964,7 @@ def _ixs(self, i: int, axis: int = 0) -> Any: """ return self._values[i] - def _slice(self, slobj: slice, axis: int = 0) -> Series: + def _slice(self, slobj: slice, axis: AxisInt = 0) -> Series: # axis kwarg is retained for compat with NDFrame method # _slice is *always* positional return self._get_values(slobj) @@ -2496,7 +2500,9 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab >>> s.idxmin(skipna=False) nan """ - i = self.argmin(axis, skipna, *args, **kwargs) + # error: Argument 1 to "argmin" of "IndexOpsMixin" has incompatible type "Union + # [int, Literal['index', 'columns']]"; expected "Optional[int]" + i = self.argmin(axis, skipna, *args, **kwargs) # type: ignore[arg-type] if i == -1: return np.nan return self.index[i] @@ -2565,7 +2571,9 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab >>> s.idxmax(skipna=False) nan """ - i = self.argmax(axis, skipna, *args, **kwargs) + # error: Argument 1 to "argmax" of "IndexOpsMixin" has incompatible type + # "Union[int, Literal['index', 'columns']]"; expected "Optional[int]" + i = self.argmax(axis, skipna, *args, **kwargs) # type: ignore[arg-type] if i == -1: return np.nan return self.index[i] @@ -6220,10 +6228,10 @@ def mask( # type: ignore[override] # ---------------------------------------------------------------------- # Add index - _AXIS_ORDERS = ["index"] + _AXIS_ORDERS: list[Literal["index", "columns"]] = ["index"] _AXIS_LEN = len(_AXIS_ORDERS) _info_axis_number = 0 - _info_axis_name = "index" + _info_axis_name: Literal["index"] = "index" index = properties.AxisProperty( axis=0, doc="The index (axis labels) of the Series." diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 29aa0761f89c9..2b386164028b7 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -22,6 +22,7 @@ ) from pandas._libs.hashtable import unique_label_indices from pandas._typing import ( + AxisInt, IndexKeyFunc, Level, NaPosition, @@ -447,7 +448,7 @@ def nargsort( return ensure_platform_int(indexer) -def nargminmax(values: ExtensionArray, method: str, axis: int = 0): +def nargminmax(values: ExtensionArray, method: str, axis: AxisInt = 0): """ Implementation of np.argmin/argmax but for ExtensionArray and which handles missing values. diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 4cda523987020..be8f0e22a5174 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -25,6 +25,7 @@ from pandas._libs import lib from pandas._typing import ( Axis, + AxisInt, FilePath, IndexLabel, Level, @@ -1582,7 +1583,7 @@ def _update_ctx(self, attrs: DataFrame) -> None: i = self.index.get_loc(rn) self.ctx[(i, j)].extend(css_list) - def _update_ctx_header(self, attrs: DataFrame, axis: int) -> None: + def _update_ctx_header(self, attrs: DataFrame, axis: AxisInt) -> None: """ Update the state of the ``Styler`` for header cells. @@ -1854,7 +1855,7 @@ def apply( def _apply_index( self, func: Callable, - axis: int | str = 0, + axis: Axis = 0, level: Level | list[Level] | None = None, method: str = "apply", **kwargs, @@ -1888,7 +1889,7 @@ def _apply_index( def apply_index( self, func: Callable, - axis: int | str = 0, + axis: AxisInt | str = 0, level: Level | list[Level] | None = None, **kwargs, ) -> Styler: @@ -1973,7 +1974,7 @@ def apply_index( def applymap_index( self, func: Callable, - axis: int | str = 0, + axis: AxisInt | str = 0, level: Level | list[Level] | None = None, **kwargs, ) -> Styler: @@ -2500,7 +2501,7 @@ def set_sticky( def set_table_styles( self, table_styles: dict[Any, CSSStyles] | CSSStyles | None = None, - axis: int = 0, + axis: AxisInt = 0, overwrite: bool = True, css_class_names: dict[str, str] | None = None, ) -> Styler: diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 07a09677caf13..dd56928d3a496 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1157,7 +1157,7 @@ def format( def format_index( self, formatter: ExtFormatter | None = None, - axis: int | str = 0, + axis: Axis = 0, level: Level | list[Level] | None = None, na_rep: str | None = None, precision: int | None = None, diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 701f72f605989..9b8364c449e36 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -190,7 +190,7 @@ class Writer(ABC): def __init__( self, - obj, + obj: NDFrame, orient: str | None, date_format: str, double_precision: int, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f3f3778c1fcbe..75cbf57f82644 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -45,6 +45,7 @@ from pandas._typing import ( AnyArrayLike, ArrayLike, + AxisInt, DtypeArg, FilePath, Shape, @@ -4809,7 +4810,9 @@ def read( return df -def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataFrame: +def _reindex_axis( + obj: DataFrame, axis: AxisInt, labels: Index, other=None +) -> DataFrame: ax = obj._get_axis(axis) labels = ensure_index(labels) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index a1603ea3dc17a..70115f8679337 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,5 +1,7 @@ from __future__ import annotations +from pandas._typing import AxisInt + from pandas import ( DataFrame, concat, @@ -39,7 +41,7 @@ def _check_mixed_int(df, dtype=None): assert df.dtypes["D"] == dtypes["D"] -def zip_frames(frames: list[DataFrame], axis: int = 1) -> DataFrame: +def zip_frames(frames: list[DataFrame], axis: AxisInt = 1) -> DataFrame: """ take a list of frames, zip them together under the assumption that these all have the first frames' index/columns.