diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c1b587ce3a6b2..011f9c2a680cb 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -112,16 +112,19 @@ # --------------- # def _ensure_data(values: ArrayLike) -> np.ndarray: """ - routine to ensure that our data is of the correct - input dtype for lower-level routines + Ensure values is of the correct input dtype for lower-level routines. This will coerce: - ints -> int64 - uint -> uint64 - - bool -> uint64 (TODO this should be uint8) + - bool -> uint8 - datetimelike -> i8 - datetime64tz -> i8 (in local tz) - categorical -> codes + - categorical[bool] without nulls -> uint8 + - categorical[bool] with nulls -> ValueError: cannot convert float NaN to integer + - boolean without nulls -> uint8 + - boolean with nulls -> object Parameters ---------- @@ -165,10 +168,8 @@ def _ensure_data(values: ArrayLike) -> np.ndarray: return np.asarray(values) elif is_complex_dtype(values.dtype): - # Incompatible return value type (got "Tuple[Union[Any, ExtensionArray, - # ndarray[Any, Any]], Union[Any, ExtensionDtype]]", expected - # "Tuple[ndarray[Any, Any], Union[dtype[Any], ExtensionDtype]]") - return values # type: ignore[return-value] + assert isinstance(values, np.ndarray) # for mypy + return values # datetimelike elif needs_i8_conversion(values.dtype): @@ -1723,11 +1724,7 @@ def safe_sort( if not isinstance(values, (np.ndarray, ABCExtensionArray)): # don't convert to string types dtype, _ = infer_dtype_from_array(values) - # error: Argument "dtype" to "asarray" has incompatible type "Union[dtype[Any], - # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, - # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], - # _DTypeDict, Tuple[Any, Any]]]" - values = np.asarray(values, dtype=dtype) # type: ignore[arg-type] + values = np.asarray(values, dtype=dtype) sorter = None diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c0ac9098ec7fc..6b4fe68d65a8b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -14,6 +14,7 @@ from typing import ( TYPE_CHECKING, Any, + Literal, Sized, TypeVar, cast, @@ -795,6 +796,25 @@ def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: return {maybe_box_datetimelike(key): value for key, value in d.items()} +@overload +def infer_dtype_from_array( + arr, +) -> tuple[np.dtype, ArrayLike]: + ... + + +@overload +def infer_dtype_from_array( + arr, pandas_dtype: Literal[False] = ... +) -> tuple[np.dtype, ArrayLike]: + ... + + +@overload +def infer_dtype_from_array(arr, pandas_dtype: bool = ...) -> tuple[DtypeObj, ArrayLike]: + ... + + def infer_dtype_from_array( arr, pandas_dtype: bool = False ) -> tuple[DtypeObj, ArrayLike]: