-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TYP: pandas/core/missing.py #38339
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
TYP: pandas/core/missing.py #38339
Changes from all commits
Commits
Show all changes
31 commits
Select commit
Hold shift + click to select a range
830fa00
add type hints
arw2019 605dc3c
review: remove assert
arw2019 e77c940
merge master
arw2019 f2d5ec4
typo
arw2019 e83904f
add isna check
arw2019 71caeeb
better error msg when interp method not string
arw2019 8fbbd47
improve docstring
arw2019 4474ada
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 575c227
remove Optional
arw2019 b19896b
use Axis TypeVar
arw2019 5036ee1
more hints
arw2019 c0c4338
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 2a31823
review comments
arw2019 4fb893b
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 95a734b
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 4aeec70
review comment
arw2019 d67977d
review comment: values_to_mask
arw2019 24f418a
review comments: mask_missing/infer_dtype_from_array
arw2019 aeb0b82
typo
arw2019 25d0051
typo
arw2019 bbd25ed
review comment
arw2019 785d27c
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 c2d6467
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 b505de5
review comment
arw2019 e39c152
docstring fix
arw2019 cb82c9a
review comments
arw2019 65effed
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 2fa64bd
Merge branch 'master' of https://github.com/pandas-dev/pandas into ty…
arw2019 a54a02f
merge master
arw2019 315822c
TYP: infer_dtype_from_array
arw2019 df4b70a
minimize diff
arw2019 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,9 +7,12 @@ | |
from typing import ( | ||
TYPE_CHECKING, | ||
Any, | ||
Callable, | ||
List, | ||
Optional, | ||
Sequence, | ||
Set, | ||
Tuple, | ||
Union, | ||
) | ||
|
||
|
@@ -20,9 +23,11 @@ | |
lib, | ||
) | ||
from pandas._typing import ( | ||
AnyArrayLike, | ||
ArrayLike, | ||
Axis, | ||
DtypeObj, | ||
IndexLabel, | ||
Scalar, | ||
) | ||
from pandas.compat._optional import import_optional_dependency | ||
|
||
|
@@ -39,7 +44,9 @@ | |
from pandas import Index | ||
|
||
|
||
def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray: | ||
def mask_missing( | ||
arr: AnyArrayLike, values_to_mask: Union[AnyArrayLike, Scalar, Sequence[Any]] | ||
) -> np.ndarray: | ||
""" | ||
Return a masking array of same size/shape as arr | ||
with entries equaling any member of values_to_mask set to True | ||
|
@@ -77,7 +84,9 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray: | |
return mask | ||
|
||
|
||
def clean_fill_method(method, allow_nearest: bool = False): | ||
def clean_fill_method( | ||
method: Optional[str], allow_nearest: bool = False | ||
) -> Optional[str]: | ||
# asfreq is compat for resampling | ||
if method in [None, "asfreq"]: | ||
return None | ||
|
@@ -136,7 +145,7 @@ def clean_interp_method(method: str, **kwargs) -> str: | |
return method | ||
|
||
|
||
def find_valid_index(values, how: str): | ||
def find_valid_index(values: ArrayLike, how: str) -> Optional[int]: | ||
""" | ||
Retrieves the index of the first valid value. | ||
|
||
|
@@ -176,15 +185,15 @@ def find_valid_index(values, how: str): | |
def interpolate_1d( | ||
xvalues: Index, | ||
yvalues: np.ndarray, | ||
method: Optional[str] = "linear", | ||
method: str = "linear", | ||
limit: Optional[int] = None, | ||
limit_direction: str = "forward", | ||
limit_area: Optional[str] = None, | ||
fill_value: Optional[Any] = None, | ||
bounds_error: bool = False, | ||
order: Optional[int] = None, | ||
**kwargs, | ||
): | ||
) -> np.ndarray: | ||
""" | ||
Logic for the 1-d interpolation. The result should be 1-d, inputs | ||
xvalues and yvalues will each be 1-d arrays of the same length. | ||
|
@@ -234,8 +243,13 @@ def interpolate_1d( | |
|
||
# These are sets of index pointers to invalid values... i.e. {0, 1, etc... | ||
all_nans = set(np.flatnonzero(invalid)) | ||
start_nans = set(range(find_valid_index(yvalues, "first"))) | ||
end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) | ||
|
||
start_nan_idx = find_valid_index(yvalues, "first") | ||
start_nans = set() if start_nan_idx is None else set(range(start_nan_idx)) | ||
|
||
end_nan_idx = find_valid_index(yvalues, "last") | ||
end_nans = set() if end_nan_idx is None else set(range(1 + end_nan_idx, len(valid))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this fixing a bug in the case where end_nan_idx is None? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so because we special case all nans/no nans at the top of the the method |
||
|
||
mid_nans = all_nans - start_nans - end_nans | ||
|
||
# Like the sets above, preserve_nans contains indices of invalid values, | ||
|
@@ -308,8 +322,15 @@ def interpolate_1d( | |
|
||
|
||
def _interpolate_scipy_wrapper( | ||
x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs | ||
): | ||
x: np.ndarray, | ||
y: np.ndarray, | ||
new_x: Union[Scalar, np.ndarray], | ||
method: str, | ||
fill_value: Optional[Scalar] = None, | ||
bounds_error: bool = False, | ||
order: Optional[int] = None, | ||
**kwargs, | ||
) -> np.ndarray: | ||
""" | ||
Passed off to scipy.interpolate.interp1d. method is scipy's kind. | ||
Returns an array interpolated at new_x. Add any new methods to | ||
|
@@ -349,15 +370,14 @@ def _interpolate_scipy_wrapper( | |
"polynomial", | ||
] | ||
if method in interp1d_methods: | ||
if method == "polynomial": | ||
method = order | ||
kind = order if method == "polynomial" else method | ||
terp = interpolate.interp1d( | ||
x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error | ||
x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error | ||
) | ||
new_y = terp(new_x) | ||
elif method == "spline": | ||
# GH #10633, #24014 | ||
if isna(order) or (order <= 0): | ||
if order is None or isna(order) or order <= 0: | ||
raise ValueError( | ||
f"order needs to be specified and greater than 0; got order: {order}" | ||
) | ||
|
@@ -372,12 +392,23 @@ def _interpolate_scipy_wrapper( | |
y = y.copy() | ||
if not new_x.flags.writeable: | ||
new_x = new_x.copy() | ||
method = alt_methods[method] | ||
new_y = method(x, y, new_x, **kwargs) | ||
|
||
if isinstance(method, str): | ||
alt_method = alt_methods[method] | ||
new_y = alt_method(x, y, new_x, **kwargs) | ||
else: | ||
raise ValueError(f"{method} is not a valid interp method") | ||
arw2019 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return new_y | ||
|
||
|
||
def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): | ||
def _from_derivatives( | ||
xi: np.ndarray, | ||
yi: np.ndarray, | ||
x: np.ndarray, | ||
order: Optional[Union[int, List[int]]] = None, | ||
der: Union[int, List[int]] = 0, | ||
extrapolate: bool = False, | ||
) -> np.ndarray: | ||
""" | ||
Convenience function for interpolate.BPoly.from_derivatives. | ||
|
||
|
@@ -390,15 +421,16 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): | |
sorted 1D array of x-coordinates | ||
yi : array_like or list of array-likes | ||
yi[i][j] is the j-th derivative known at xi[i] | ||
order: None or int or array_like of ints. Default: None. | ||
x : scalar or array_like | ||
order: None or int or array_like of ints, default: None | ||
Specifies the degree of local polynomials. If not None, some | ||
derivatives are ignored. | ||
der : int or list | ||
der : int or list, default: 0 | ||
How many derivatives to extract; None for all potentially nonzero | ||
derivatives (that is a number equal to the number of points), or a | ||
list of derivatives to extract. This number includes the function | ||
value as 0th derivative. | ||
extrapolate : bool, optional | ||
extrapolate : bool, default False | ||
Whether to extrapolate to ouf-of-bounds points based on first and last | ||
intervals, or to return NaNs. Default: True. | ||
|
||
|
@@ -420,7 +452,13 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): | |
return m(x) | ||
|
||
|
||
def _akima_interpolate(xi, yi, x, der=0, axis=0): | ||
def _akima_interpolate( | ||
xi: np.ndarray, | ||
yi: np.ndarray, | ||
x: np.ndarray, | ||
der: int = 0, | ||
axis: int = 0, | ||
) -> Union[Scalar, ArrayLike]: | ||
""" | ||
Convenience function for akima interpolation. | ||
xi and yi are arrays of values used to approximate some function f, | ||
|
@@ -430,13 +468,13 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): | |
|
||
Parameters | ||
---------- | ||
xi : array_like | ||
xi : np.ndarray | ||
A sorted list of x-coordinates, of length N. | ||
yi : array_like | ||
yi : np.ndarray | ||
A 1-D array of real values. `yi`'s length along the interpolation | ||
axis must be equal to the length of `xi`. If N-D array, use axis | ||
parameter to select correct axis. | ||
x : scalar or array_like | ||
x : array_like | ||
Of length M. | ||
der : int, optional | ||
How many derivatives to extract; None for all potentially | ||
|
@@ -463,7 +501,14 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): | |
return P(x, nu=der) | ||
|
||
|
||
def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None): | ||
def _cubicspline_interpolate( | ||
xi: np.ndarray, | ||
yi: np.ndarray, | ||
x: np.ndarray, | ||
axis: int = 0, | ||
bc_type: Union[str, Tuple] = "not-a-knot", | ||
extrapolate: Optional[Union[bool, str]] = None, | ||
) -> Union[ArrayLike, Scalar]: | ||
""" | ||
Convenience function for cubic spline data interpolator. | ||
|
||
|
@@ -478,7 +523,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat | |
Array containing values of the dependent variable. It can have | ||
arbitrary number of dimensions, but the length along ``axis`` | ||
(see below) must match the length of ``x``. Values must be finite. | ||
x : scalar or array_like, shape (m,) | ||
x : array_like, shape (m,) | ||
axis : int, optional | ||
Axis along which `y` is assumed to be varying. Meaning that for | ||
``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. | ||
|
@@ -571,6 +616,8 @@ def _interpolate_with_limit_area( | |
first = find_valid_index(values, "first") | ||
last = find_valid_index(values, "last") | ||
|
||
assert first is not None and last is not None | ||
|
||
values = interpolate_2d( | ||
values, | ||
method=method, | ||
|
@@ -588,12 +635,12 @@ def _interpolate_with_limit_area( | |
|
||
|
||
def interpolate_2d( | ||
values, | ||
values: np.ndarray, | ||
method: str = "pad", | ||
axis: Axis = 0, | ||
axis: int = 0, | ||
limit: Optional[int] = None, | ||
limit_area: Optional[str] = None, | ||
): | ||
) -> np.ndarray: | ||
""" | ||
Perform an actual interpolation of values, values will be make 2-d if | ||
needed fills inplace, returns the result. | ||
|
@@ -639,7 +686,10 @@ def interpolate_2d( | |
raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") | ||
values = values.reshape(tuple((1,) + values.shape)) | ||
|
||
method = clean_fill_method(method) | ||
method_cleaned = clean_fill_method(method) | ||
assert isinstance(method_cleaned, str) | ||
arw2019 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
method = method_cleaned | ||
|
||
tvalues = transf(values) | ||
if method == "pad": | ||
result = _pad_2d(tvalues, limit=limit) | ||
|
@@ -658,7 +708,9 @@ def interpolate_2d( | |
return result | ||
|
||
|
||
def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool): | ||
def _cast_values_for_fillna( | ||
values: ArrayLike, dtype: DtypeObj, has_mask: bool | ||
) -> ArrayLike: | ||
""" | ||
Cast values to a dtype that algos.pad and algos.backfill can handle. | ||
""" | ||
|
@@ -677,34 +729,41 @@ def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool): | |
return values | ||
|
||
|
||
def _fillna_prep(values, mask=None): | ||
def _fillna_prep( | ||
values: np.ndarray, mask: Optional[np.ndarray] = None | ||
) -> Tuple[np.ndarray, np.ndarray]: | ||
# boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d | ||
dtype = values.dtype | ||
|
||
has_mask = mask is not None | ||
if not has_mask: | ||
# This needs to occur before datetime/timedeltas are cast to int64 | ||
mask = isna(values) | ||
|
||
values = _cast_values_for_fillna(values, dtype, has_mask) | ||
# This needs to occur before datetime/timedeltas are cast to int64 | ||
mask = isna(values) if mask is None else mask | ||
|
||
values = _cast_values_for_fillna(values, values.dtype, has_mask) | ||
mask = mask.view(np.uint8) | ||
|
||
return values, mask | ||
|
||
|
||
def _pad_1d(values, limit=None, mask=None): | ||
def _pad_1d( | ||
values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None | ||
) -> np.ndarray: | ||
values, mask = _fillna_prep(values, mask) | ||
algos.pad_inplace(values, mask, limit=limit) | ||
return values | ||
|
||
|
||
def _backfill_1d(values, limit=None, mask=None): | ||
def _backfill_1d( | ||
values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None | ||
) -> np.ndarray: | ||
values, mask = _fillna_prep(values, mask) | ||
algos.backfill_inplace(values, mask, limit=limit) | ||
return values | ||
|
||
|
||
def _pad_2d(values, limit=None, mask=None): | ||
def _pad_2d( | ||
values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None | ||
) -> np.ndarray: | ||
values, mask = _fillna_prep(values, mask) | ||
|
||
if np.all(values.shape): | ||
|
@@ -715,7 +774,9 @@ def _pad_2d(values, limit=None, mask=None): | |
return values | ||
|
||
|
||
def _backfill_2d(values, limit=None, mask=None): | ||
def _backfill_2d( | ||
values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None | ||
) -> np.ndarray: | ||
values, mask = _fillna_prep(values, mask) | ||
|
||
if np.all(values.shape): | ||
|
@@ -729,16 +790,19 @@ def _backfill_2d(values, limit=None, mask=None): | |
_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} | ||
|
||
|
||
def get_fill_func(method): | ||
method = clean_fill_method(method) | ||
return _fill_methods[method] | ||
def get_fill_func(method: str) -> Callable: | ||
method_cleaned = clean_fill_method(method) | ||
assert isinstance(method_cleaned, str) | ||
return _fill_methods[method_cleaned] | ||
|
||
|
||
def clean_reindex_fill_method(method): | ||
def clean_reindex_fill_method(method: str) -> Optional[str]: | ||
return clean_fill_method(method, allow_nearest=True) | ||
|
||
|
||
def _interp_limit(invalid, fw_limit, bw_limit): | ||
def _interp_limit( | ||
invalid: np.ndarray, fw_limit: Optional[int], bw_limit: Optional[int] | ||
) -> Set[IndexLabel]: | ||
""" | ||
Get indexers of values that won't be filled | ||
because they exceed the limits. | ||
|
@@ -773,7 +837,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): | |
f_idx = set() | ||
b_idx = set() | ||
|
||
def inner(invalid, limit): | ||
def inner(invalid: np.ndarray, limit: int) -> Set[IndexLabel]: | ||
limit = min(limit, N) | ||
windowed = _rolling_window(invalid, limit + 1).all(1) | ||
idx = set(np.where(windowed)[0] + limit) | set( | ||
|
@@ -803,7 +867,7 @@ def inner(invalid, limit): | |
return f_idx & b_idx | ||
|
||
|
||
def _rolling_window(a: np.ndarray, window: int): | ||
def _rolling_window(a: np.ndarray, window: int) -> np.ndarray: | ||
""" | ||
[True, True, False, True, False], 2 -> | ||
|
||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the
ArrayLike
forarr
was correct here?(or if not, the docstring needs to be updated as well)