pandas-dev · arw2019 · Dec 5, 2020 · Dec 7, 2020 · Dec 7, 2020 · Dec 7, 2020
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -849,8 +849,8 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]:
 
 
 def infer_dtype_from_array(
-    arr, pandas_dtype: bool = False
-) -> Tuple[DtypeObj, ArrayLike]:
+    arr: AnyArrayLike, pandas_dtype: bool = False
+) -> Tuple[DtypeObj, Union[ArrayLike, Series]]:
     """
     Infer the dtype from an array.
 

diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -7,9 +7,12 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Callable,
     List,
     Optional,
+    Sequence,
     Set,
+    Tuple,
     Union,
 )
 
@@ -20,9 +23,11 @@
     lib,
 )
 from pandas._typing import (
+    AnyArrayLike,
     ArrayLike,
-    Axis,
     DtypeObj,
+    IndexLabel,
+    Scalar,
 )
 from pandas.compat._optional import import_optional_dependency
 
@@ -39,7 +44,9 @@
     from pandas import Index
 
 
-def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray:
+def mask_missing(
+    arr: AnyArrayLike, values_to_mask: Union[AnyArrayLike, Scalar, Sequence[Any]]
+) -> np.ndarray:
     """
     Return a masking array of same size/shape as arr
     with entries equaling any member of values_to_mask set to True
@@ -77,7 +84,9 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray:
     return mask
 
 
-def clean_fill_method(method, allow_nearest: bool = False):
+def clean_fill_method(
+    method: Optional[str], allow_nearest: bool = False
+) -> Optional[str]:
     # asfreq is compat for resampling
     if method in [None, "asfreq"]:
         return None
@@ -136,7 +145,7 @@ def clean_interp_method(method: str, **kwargs) -> str:
     return method
 
 
-def find_valid_index(values, how: str):
+def find_valid_index(values: ArrayLike, how: str) -> Optional[int]:
     """
     Retrieves the index of the first valid value.
 
@@ -176,15 +185,15 @@ def find_valid_index(values, how: str):
 def interpolate_1d(
     xvalues: Index,
     yvalues: np.ndarray,
-    method: Optional[str] = "linear",
+    method: str = "linear",
     limit: Optional[int] = None,
     limit_direction: str = "forward",
     limit_area: Optional[str] = None,
     fill_value: Optional[Any] = None,
     bounds_error: bool = False,
     order: Optional[int] = None,
     **kwargs,
-):
+) -> np.ndarray:
     """
     Logic for the 1-d interpolation.  The result should be 1-d, inputs
     xvalues and yvalues will each be 1-d arrays of the same length.
@@ -234,8 +243,13 @@ def interpolate_1d(
 
     # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
     all_nans = set(np.flatnonzero(invalid))
-    start_nans = set(range(find_valid_index(yvalues, "first")))
-    end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid)))
+
+    start_nan_idx = find_valid_index(yvalues, "first")
+    start_nans = set() if start_nan_idx is None else set(range(start_nan_idx))
+
+    end_nan_idx = find_valid_index(yvalues, "last")
+    end_nans = set() if end_nan_idx is None else set(range(1 + end_nan_idx, len(valid)))
+
     mid_nans = all_nans - start_nans - end_nans
 
     # Like the sets above, preserve_nans contains indices of invalid values,
@@ -308,8 +322,15 @@ def interpolate_1d(
 
 
 def _interpolate_scipy_wrapper(
-    x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs
-):
+    x: np.ndarray,
+    y: np.ndarray,
+    new_x: Union[Scalar, np.ndarray],
+    method: str,
+    fill_value: Optional[Scalar] = None,
+    bounds_error: bool = False,
+    order: Optional[int] = None,
+    **kwargs,
+) -> np.ndarray:
     """
     Passed off to scipy.interpolate.interp1d. method is scipy's kind.
     Returns an array interpolated at new_x.  Add any new methods to
@@ -349,15 +370,14 @@ def _interpolate_scipy_wrapper(
         "polynomial",
     ]
     if method in interp1d_methods:
-        if method == "polynomial":
-            method = order
+        kind = order if method == "polynomial" else method
         terp = interpolate.interp1d(
-            x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error
+            x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error
         )
         new_y = terp(new_x)
     elif method == "spline":
         # GH #10633, #24014
-        if isna(order) or (order <= 0):
+        if order is None or isna(order) or order <= 0:
             raise ValueError(
                 f"order needs to be specified and greater than 0; got order: {order}"
             )
@@ -372,12 +392,23 @@ def _interpolate_scipy_wrapper(
             y = y.copy()
         if not new_x.flags.writeable:
             new_x = new_x.copy()
-        method = alt_methods[method]
-        new_y = method(x, y, new_x, **kwargs)
+
+        if isinstance(method, str):
+            alt_method = alt_methods[method]
+            new_y = alt_method(x, y, new_x, **kwargs)
+        else:
+            raise ValueError(f"{method} is not a valid interp method")
     return new_y
 
 
-def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
+def _from_derivatives(
+    xi: np.ndarray,
+    yi: np.ndarray,
+    x: np.ndarray,
+    order: Optional[Union[int, List[int]]] = None,
+    der: Union[int, List[int]] = 0,
+    extrapolate: bool = False,
+) -> np.ndarray:
     """
     Convenience function for interpolate.BPoly.from_derivatives.
 
@@ -390,15 +421,16 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
         sorted 1D array of x-coordinates
     yi : array_like or list of array-likes
         yi[i][j] is the j-th derivative known at xi[i]
-    order: None or int or array_like of ints. Default: None.
+    x : scalar or array_like
+    order: None or int or array_like of ints, default: None
         Specifies the degree of local polynomials. If not None, some
         derivatives are ignored.
-    der : int or list
+    der : int or list, default: 0
         How many derivatives to extract; None for all potentially nonzero
         derivatives (that is a number equal to the number of points), or a
         list of derivatives to extract. This number includes the function
         value as 0th derivative.
-     extrapolate : bool, optional
+    extrapolate : bool, default False
         Whether to extrapolate to ouf-of-bounds points based on first and last
         intervals, or to return NaNs. Default: True.
 
@@ -420,7 +452,13 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
     return m(x)
 
 
-def _akima_interpolate(xi, yi, x, der=0, axis=0):
+def _akima_interpolate(
+    xi: np.ndarray,
+    yi: np.ndarray,
+    x: np.ndarray,
+    der: int = 0,
+    axis: int = 0,
+) -> Union[Scalar, ArrayLike]:
     """
     Convenience function for akima interpolation.
     xi and yi are arrays of values used to approximate some function f,
@@ -430,13 +468,13 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
 
     Parameters
     ----------
-    xi : array_like
+    xi : np.ndarray
         A sorted list of x-coordinates, of length N.
-    yi : array_like
+    yi : np.ndarray
         A 1-D array of real values.  `yi`'s length along the interpolation
         axis must be equal to the length of `xi`. If N-D array, use axis
         parameter to select correct axis.
-    x : scalar or array_like
+    x : array_like
         Of length M.
     der : int, optional
         How many derivatives to extract; None for all potentially
@@ -463,7 +501,14 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
     return P(x, nu=der)
 
 
-def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None):
+def _cubicspline_interpolate(
+    xi: np.ndarray,
+    yi: np.ndarray,
+    x: np.ndarray,
+    axis: int = 0,
+    bc_type: Union[str, Tuple] = "not-a-knot",
+    extrapolate: Optional[Union[bool, str]] = None,
+) -> Union[ArrayLike, Scalar]:
     """
     Convenience function for cubic spline data interpolator.
 
@@ -478,7 +523,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat
         Array containing values of the dependent variable. It can have
         arbitrary number of dimensions, but the length along ``axis``
         (see below) must match the length of ``x``. Values must be finite.
-    x : scalar or array_like, shape (m,)
+    x : array_like, shape (m,)
     axis : int, optional
         Axis along which `y` is assumed to be varying. Meaning that for
         ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
@@ -571,6 +616,8 @@ def _interpolate_with_limit_area(
         first = find_valid_index(values, "first")
         last = find_valid_index(values, "last")
 
+        assert first is not None and last is not None
+
         values = interpolate_2d(
             values,
             method=method,
@@ -588,12 +635,12 @@ def _interpolate_with_limit_area(
 
 
 def interpolate_2d(
-    values,
+    values: np.ndarray,
     method: str = "pad",
-    axis: Axis = 0,
+    axis: int = 0,
     limit: Optional[int] = None,
     limit_area: Optional[str] = None,
-):
+) -> np.ndarray:
     """
     Perform an actual interpolation of values, values will be make 2-d if
     needed fills inplace, returns the result.
@@ -639,7 +686,10 @@ def interpolate_2d(
             raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0")
         values = values.reshape(tuple((1,) + values.shape))
 
-    method = clean_fill_method(method)
+    method_cleaned = clean_fill_method(method)
+    assert isinstance(method_cleaned, str)
+    method = method_cleaned
+
     tvalues = transf(values)
     if method == "pad":
         result = _pad_2d(tvalues, limit=limit)
@@ -658,7 +708,9 @@ def interpolate_2d(
     return result
 
 
-def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool):
+def _cast_values_for_fillna(
+    values: ArrayLike, dtype: DtypeObj, has_mask: bool
+) -> ArrayLike:
     """
     Cast values to a dtype that algos.pad and algos.backfill can handle.
     """
@@ -677,34 +729,41 @@ def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool):
     return values
 
 
-def _fillna_prep(values, mask=None):
+def _fillna_prep(
+    values: np.ndarray, mask: Optional[np.ndarray] = None
+) -> Tuple[np.ndarray, np.ndarray]:
     # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d
-    dtype = values.dtype
 
     has_mask = mask is not None
-    if not has_mask:
-        # This needs to occur before datetime/timedeltas are cast to int64
-        mask = isna(values)
 
-    values = _cast_values_for_fillna(values, dtype, has_mask)
+    # This needs to occur before datetime/timedeltas are cast to int64
+    mask = isna(values) if mask is None else mask
 
+    values = _cast_values_for_fillna(values, values.dtype, has_mask)
     mask = mask.view(np.uint8)
+
     return values, mask
 
 
-def _pad_1d(values, limit=None, mask=None):
+def _pad_1d(
+    values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None
+) -> np.ndarray:
     values, mask = _fillna_prep(values, mask)
     algos.pad_inplace(values, mask, limit=limit)
     return values
 
 
-def _backfill_1d(values, limit=None, mask=None):
+def _backfill_1d(
+    values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None
+) -> np.ndarray:
     values, mask = _fillna_prep(values, mask)
     algos.backfill_inplace(values, mask, limit=limit)
     return values
 
 
-def _pad_2d(values, limit=None, mask=None):
+def _pad_2d(
+    values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None
+) -> np.ndarray:
     values, mask = _fillna_prep(values, mask)
 
     if np.all(values.shape):
@@ -715,7 +774,9 @@ def _pad_2d(values, limit=None, mask=None):
     return values
 
 
-def _backfill_2d(values, limit=None, mask=None):
+def _backfill_2d(
+    values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None
+) -> np.ndarray:
     values, mask = _fillna_prep(values, mask)
 
     if np.all(values.shape):
@@ -729,16 +790,19 @@ def _backfill_2d(values, limit=None, mask=None):
 _fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d}
 
 
-def get_fill_func(method):
-    method = clean_fill_method(method)
-    return _fill_methods[method]
+def get_fill_func(method: str) -> Callable:
+    method_cleaned = clean_fill_method(method)
+    assert isinstance(method_cleaned, str)
+    return _fill_methods[method_cleaned]
 
 
-def clean_reindex_fill_method(method):
+def clean_reindex_fill_method(method: str) -> Optional[str]:
     return clean_fill_method(method, allow_nearest=True)
 
 
-def _interp_limit(invalid, fw_limit, bw_limit):
+def _interp_limit(
+    invalid: np.ndarray, fw_limit: Optional[int], bw_limit: Optional[int]
+) -> Set[IndexLabel]:
     """
     Get indexers of values that won't be filled
     because they exceed the limits.
@@ -773,7 +837,7 @@ def _interp_limit(invalid, fw_limit, bw_limit):
     f_idx = set()
     b_idx = set()
 
-    def inner(invalid, limit):
+    def inner(invalid: np.ndarray, limit: int) -> Set[IndexLabel]:
         limit = min(limit, N)
         windowed = _rolling_window(invalid, limit + 1).all(1)
         idx = set(np.where(windowed)[0] + limit) | set(
@@ -803,7 +867,7 @@ def inner(invalid, limit):
     return f_idx & b_idx
 
 
-def _rolling_window(a: np.ndarray, window: int):
+def _rolling_window(a: np.ndarray, window: int) -> np.ndarray:
     """
     [True, True, False, True, False], 2 ->