diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 669bfe08d42b0..fb664e4ed657e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -849,8 +849,8 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: def infer_dtype_from_array( - arr, pandas_dtype: bool = False -) -> Tuple[DtypeObj, ArrayLike]: + arr: AnyArrayLike, pandas_dtype: bool = False +) -> Tuple[DtypeObj, Union[ArrayLike, Series]]: """ Infer the dtype from an array. diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 9ae5f7d1b7497..68cc9ca6c01d3 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -7,9 +7,12 @@ from typing import ( TYPE_CHECKING, Any, + Callable, List, Optional, + Sequence, Set, + Tuple, Union, ) @@ -20,9 +23,11 @@ lib, ) from pandas._typing import ( + AnyArrayLike, ArrayLike, - Axis, DtypeObj, + IndexLabel, + Scalar, ) from pandas.compat._optional import import_optional_dependency @@ -39,7 +44,9 @@ from pandas import Index -def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray: +def mask_missing( + arr: AnyArrayLike, values_to_mask: Union[AnyArrayLike, Scalar, Sequence[Any]] +) -> np.ndarray: """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True @@ -77,7 +84,9 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray: return mask -def clean_fill_method(method, allow_nearest: bool = False): +def clean_fill_method( + method: Optional[str], allow_nearest: bool = False +) -> Optional[str]: # asfreq is compat for resampling if method in [None, "asfreq"]: return None @@ -136,7 +145,7 @@ def clean_interp_method(method: str, **kwargs) -> str: return method -def find_valid_index(values, how: str): +def find_valid_index(values: ArrayLike, how: str) -> Optional[int]: """ Retrieves the index of the first valid value. @@ -176,7 +185,7 @@ def find_valid_index(values, how: str): def interpolate_1d( xvalues: Index, yvalues: np.ndarray, - method: Optional[str] = "linear", + method: str = "linear", limit: Optional[int] = None, limit_direction: str = "forward", limit_area: Optional[str] = None, @@ -184,7 +193,7 @@ def interpolate_1d( bounds_error: bool = False, order: Optional[int] = None, **kwargs, -): +) -> np.ndarray: """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. @@ -234,8 +243,13 @@ def interpolate_1d( # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) - start_nans = set(range(find_valid_index(yvalues, "first"))) - end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) + + start_nan_idx = find_valid_index(yvalues, "first") + start_nans = set() if start_nan_idx is None else set(range(start_nan_idx)) + + end_nan_idx = find_valid_index(yvalues, "last") + end_nans = set() if end_nan_idx is None else set(range(1 + end_nan_idx, len(valid))) + mid_nans = all_nans - start_nans - end_nans # Like the sets above, preserve_nans contains indices of invalid values, @@ -308,8 +322,15 @@ def interpolate_1d( def _interpolate_scipy_wrapper( - x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs -): + x: np.ndarray, + y: np.ndarray, + new_x: Union[Scalar, np.ndarray], + method: str, + fill_value: Optional[Scalar] = None, + bounds_error: bool = False, + order: Optional[int] = None, + **kwargs, +) -> np.ndarray: """ Passed off to scipy.interpolate.interp1d. method is scipy's kind. Returns an array interpolated at new_x. Add any new methods to @@ -349,15 +370,14 @@ def _interpolate_scipy_wrapper( "polynomial", ] if method in interp1d_methods: - if method == "polynomial": - method = order + kind = order if method == "polynomial" else method terp = interpolate.interp1d( - x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error + x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error ) new_y = terp(new_x) elif method == "spline": # GH #10633, #24014 - if isna(order) or (order <= 0): + if order is None or isna(order) or order <= 0: raise ValueError( f"order needs to be specified and greater than 0; got order: {order}" ) @@ -372,12 +392,23 @@ def _interpolate_scipy_wrapper( y = y.copy() if not new_x.flags.writeable: new_x = new_x.copy() - method = alt_methods[method] - new_y = method(x, y, new_x, **kwargs) + + if isinstance(method, str): + alt_method = alt_methods[method] + new_y = alt_method(x, y, new_x, **kwargs) + else: + raise ValueError(f"{method} is not a valid interp method") return new_y -def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): +def _from_derivatives( + xi: np.ndarray, + yi: np.ndarray, + x: np.ndarray, + order: Optional[Union[int, List[int]]] = None, + der: Union[int, List[int]] = 0, + extrapolate: bool = False, +) -> np.ndarray: """ Convenience function for interpolate.BPoly.from_derivatives. @@ -390,15 +421,16 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): sorted 1D array of x-coordinates yi : array_like or list of array-likes yi[i][j] is the j-th derivative known at xi[i] - order: None or int or array_like of ints. Default: None. + x : scalar or array_like + order: None or int or array_like of ints, default: None Specifies the degree of local polynomials. If not None, some derivatives are ignored. - der : int or list + der : int or list, default: 0 How many derivatives to extract; None for all potentially nonzero derivatives (that is a number equal to the number of points), or a list of derivatives to extract. This number includes the function value as 0th derivative. - extrapolate : bool, optional + extrapolate : bool, default False Whether to extrapolate to ouf-of-bounds points based on first and last intervals, or to return NaNs. Default: True. @@ -420,7 +452,13 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): return m(x) -def _akima_interpolate(xi, yi, x, der=0, axis=0): +def _akima_interpolate( + xi: np.ndarray, + yi: np.ndarray, + x: np.ndarray, + der: int = 0, + axis: int = 0, +) -> Union[Scalar, ArrayLike]: """ Convenience function for akima interpolation. xi and yi are arrays of values used to approximate some function f, @@ -430,13 +468,13 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): Parameters ---------- - xi : array_like + xi : np.ndarray A sorted list of x-coordinates, of length N. - yi : array_like + yi : np.ndarray A 1-D array of real values. `yi`'s length along the interpolation axis must be equal to the length of `xi`. If N-D array, use axis parameter to select correct axis. - x : scalar or array_like + x : array_like Of length M. der : int, optional How many derivatives to extract; None for all potentially @@ -463,7 +501,14 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): return P(x, nu=der) -def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None): +def _cubicspline_interpolate( + xi: np.ndarray, + yi: np.ndarray, + x: np.ndarray, + axis: int = 0, + bc_type: Union[str, Tuple] = "not-a-knot", + extrapolate: Optional[Union[bool, str]] = None, +) -> Union[ArrayLike, Scalar]: """ Convenience function for cubic spline data interpolator. @@ -478,7 +523,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat Array containing values of the dependent variable. It can have arbitrary number of dimensions, but the length along ``axis`` (see below) must match the length of ``x``. Values must be finite. - x : scalar or array_like, shape (m,) + x : array_like, shape (m,) axis : int, optional Axis along which `y` is assumed to be varying. Meaning that for ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. @@ -571,6 +616,8 @@ def _interpolate_with_limit_area( first = find_valid_index(values, "first") last = find_valid_index(values, "last") + assert first is not None and last is not None + values = interpolate_2d( values, method=method, @@ -588,12 +635,12 @@ def _interpolate_with_limit_area( def interpolate_2d( - values, + values: np.ndarray, method: str = "pad", - axis: Axis = 0, + axis: int = 0, limit: Optional[int] = None, limit_area: Optional[str] = None, -): +) -> np.ndarray: """ Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. @@ -639,7 +686,10 @@ def interpolate_2d( raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") values = values.reshape(tuple((1,) + values.shape)) - method = clean_fill_method(method) + method_cleaned = clean_fill_method(method) + assert isinstance(method_cleaned, str) + method = method_cleaned + tvalues = transf(values) if method == "pad": result = _pad_2d(tvalues, limit=limit) @@ -658,7 +708,9 @@ def interpolate_2d( return result -def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool): +def _cast_values_for_fillna( + values: ArrayLike, dtype: DtypeObj, has_mask: bool +) -> ArrayLike: """ Cast values to a dtype that algos.pad and algos.backfill can handle. """ @@ -677,34 +729,41 @@ def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool): return values -def _fillna_prep(values, mask=None): +def _fillna_prep( + values: np.ndarray, mask: Optional[np.ndarray] = None +) -> Tuple[np.ndarray, np.ndarray]: # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d - dtype = values.dtype has_mask = mask is not None - if not has_mask: - # This needs to occur before datetime/timedeltas are cast to int64 - mask = isna(values) - values = _cast_values_for_fillna(values, dtype, has_mask) + # This needs to occur before datetime/timedeltas are cast to int64 + mask = isna(values) if mask is None else mask + values = _cast_values_for_fillna(values, values.dtype, has_mask) mask = mask.view(np.uint8) + return values, mask -def _pad_1d(values, limit=None, mask=None): +def _pad_1d( + values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None +) -> np.ndarray: values, mask = _fillna_prep(values, mask) algos.pad_inplace(values, mask, limit=limit) return values -def _backfill_1d(values, limit=None, mask=None): +def _backfill_1d( + values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None +) -> np.ndarray: values, mask = _fillna_prep(values, mask) algos.backfill_inplace(values, mask, limit=limit) return values -def _pad_2d(values, limit=None, mask=None): +def _pad_2d( + values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None +) -> np.ndarray: values, mask = _fillna_prep(values, mask) if np.all(values.shape): @@ -715,7 +774,9 @@ def _pad_2d(values, limit=None, mask=None): return values -def _backfill_2d(values, limit=None, mask=None): +def _backfill_2d( + values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None +) -> np.ndarray: values, mask = _fillna_prep(values, mask) if np.all(values.shape): @@ -729,16 +790,19 @@ def _backfill_2d(values, limit=None, mask=None): _fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} -def get_fill_func(method): - method = clean_fill_method(method) - return _fill_methods[method] +def get_fill_func(method: str) -> Callable: + method_cleaned = clean_fill_method(method) + assert isinstance(method_cleaned, str) + return _fill_methods[method_cleaned] -def clean_reindex_fill_method(method): +def clean_reindex_fill_method(method: str) -> Optional[str]: return clean_fill_method(method, allow_nearest=True) -def _interp_limit(invalid, fw_limit, bw_limit): +def _interp_limit( + invalid: np.ndarray, fw_limit: Optional[int], bw_limit: Optional[int] +) -> Set[IndexLabel]: """ Get indexers of values that won't be filled because they exceed the limits. @@ -773,7 +837,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): f_idx = set() b_idx = set() - def inner(invalid, limit): + def inner(invalid: np.ndarray, limit: int) -> Set[IndexLabel]: limit = min(limit, N) windowed = _rolling_window(invalid, limit + 1).all(1) idx = set(np.where(windowed)[0] + limit) | set( @@ -803,7 +867,7 @@ def inner(invalid, limit): return f_idx & b_idx -def _rolling_window(a: np.ndarray, window: int): +def _rolling_window(a: np.ndarray, window: int) -> np.ndarray: """ [True, True, False, True, False], 2 ->