diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 2de970466e19f..0408dfd83fedc 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -9,7 +9,6 @@ Callable, Hashable, Literal, - TypeVar, ) import numpy as np @@ -93,9 +92,9 @@ AxisInt, DtypeObj, QuantileInterpolation, + Self, npt, ) -T = TypeVar("T", bound="BaseArrayManager") class BaseArrayManager(DataManager): @@ -131,7 +130,7 @@ def __init__( ) -> None: raise NotImplementedError - def make_empty(self: T, axes=None) -> T: + def make_empty(self, axes=None) -> Self: """Return an empty ArrayManager with the items axis of len 0 (no columns)""" if axes is None: axes = [self.axes[1:], Index([])] @@ -195,11 +194,11 @@ def __repr__(self) -> str: return output def apply( - self: T, + self, f, align_keys: list[str] | None = None, **kwargs, - ) -> T: + ) -> Self: """ Iterate over the arrays, collect and create a new ArrayManager. @@ -257,8 +256,8 @@ def apply( return type(self)(result_arrays, new_axes) # type: ignore[arg-type] def apply_with_block( - self: T, f, align_keys=None, swap_axis: bool = True, **kwargs - ) -> T: + self, f, align_keys=None, swap_axis: bool = True, **kwargs + ) -> Self: # switch axis to follow BlockManager logic if swap_axis and "axis" in kwargs and self.ndim == 2: kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0 @@ -311,7 +310,7 @@ def apply_with_block( return type(self)(result_arrays, self._axes) - def where(self: T, other, cond, align: bool) -> T: + def where(self, other, cond, align: bool) -> Self: if align: align_keys = ["other", "cond"] else: @@ -325,13 +324,13 @@ def where(self: T, other, cond, align: bool) -> T: cond=cond, ) - def round(self: T, decimals: int, using_cow: bool = False) -> T: + def round(self, decimals: int, using_cow: bool = False) -> Self: return self.apply_with_block("round", decimals=decimals, using_cow=using_cow) - def setitem(self: T, indexer, value) -> T: + def setitem(self, indexer, value) -> Self: return self.apply_with_block("setitem", indexer=indexer, value=value) - def putmask(self: T, mask, new, align: bool = True) -> T: + def putmask(self, mask, new, align: bool = True) -> Self: if align: align_keys = ["new", "mask"] else: @@ -345,14 +344,14 @@ def putmask(self: T, mask, new, align: bool = True) -> T: new=new, ) - def diff(self: T, n: int, axis: AxisInt) -> T: + def diff(self, n: int, axis: AxisInt) -> Self: assert self.ndim == 2 and axis == 0 # caller ensures return self.apply(algos.diff, n=n, axis=axis) - def interpolate(self: T, **kwargs) -> T: + def interpolate(self, **kwargs) -> Self: return self.apply_with_block("interpolate", swap_axis=False, **kwargs) - def shift(self: T, periods: int, axis: AxisInt, fill_value) -> T: + def shift(self, periods: int, axis: AxisInt, fill_value) -> Self: if fill_value is lib.no_default: fill_value = None @@ -364,7 +363,7 @@ def shift(self: T, periods: int, axis: AxisInt, fill_value) -> T: "shift", periods=periods, axis=axis, fill_value=fill_value ) - def fillna(self: T, value, limit, inplace: bool, downcast) -> T: + def fillna(self, value, limit, inplace: bool, downcast) -> Self: if limit is not None: # Do this validation even if we go through one of the no-op paths limit = libalgos.validate_limit(None, limit=limit) @@ -373,13 +372,13 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T: "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast ) - def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T: + def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self: if copy is None: copy = True return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors) - def convert(self: T, copy: bool | None) -> T: + def convert(self, copy: bool | None) -> Self: if copy is None: copy = True @@ -402,10 +401,10 @@ def _convert(arr): return self.apply(_convert) - def replace_regex(self: T, **kwargs) -> T: + def replace_regex(self, **kwargs) -> Self: return self.apply_with_block("_replace_regex", **kwargs) - def replace(self: T, to_replace, value, inplace: bool) -> T: + def replace(self, to_replace, value, inplace: bool) -> Self: inplace = validate_bool_kwarg(inplace, "inplace") assert np.ndim(value) == 0, value # TODO "replace" is right now implemented on the blocks, we should move @@ -415,12 +414,12 @@ def replace(self: T, to_replace, value, inplace: bool) -> T: ) def replace_list( - self: T, + self, src_list: list[Any], dest_list: list[Any], inplace: bool = False, regex: bool = False, - ) -> T: + ) -> Self: """do a list replace""" inplace = validate_bool_kwarg(inplace, "inplace") @@ -432,7 +431,7 @@ def replace_list( regex=regex, ) - def to_native_types(self: T, **kwargs) -> T: + def to_native_types(self, **kwargs) -> Self: return self.apply(to_native_types, **kwargs) @property @@ -458,7 +457,7 @@ def is_view(self) -> bool: def is_single_block(self) -> bool: return len(self.arrays) == 1 - def _get_data_subset(self: T, predicate: Callable) -> T: + def _get_data_subset(self, predicate: Callable) -> Self: indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)] arrays = [self.arrays[i] for i in indices] # TODO copy? @@ -469,7 +468,7 @@ def _get_data_subset(self: T, predicate: Callable) -> T: new_axes = [self._axes[0], new_cols] return type(self)(arrays, new_axes, verify_integrity=False) - def get_bool_data(self: T, copy: bool = False) -> T: + def get_bool_data(self, copy: bool = False) -> Self: """ Select columns that are bool-dtype and object-dtype columns that are all-bool. @@ -480,7 +479,7 @@ def get_bool_data(self: T, copy: bool = False) -> T: """ return self._get_data_subset(lambda x: x.dtype == np.dtype(bool)) - def get_numeric_data(self: T, copy: bool = False) -> T: + def get_numeric_data(self, copy: bool = False) -> Self: """ Select columns that have a numeric dtype. @@ -494,7 +493,7 @@ def get_numeric_data(self: T, copy: bool = False) -> T: or getattr(arr.dtype, "_is_numeric", False) ) - def copy(self: T, deep: bool | Literal["all"] | None = True) -> T: + def copy(self, deep: bool | Literal["all"] | None = True) -> Self: """ Make deep or shallow copy of ArrayManager @@ -531,7 +530,7 @@ def copy_func(ax): return type(self)(new_arrays, new_axes, verify_integrity=False) def reindex_indexer( - self: T, + self, new_axis, indexer, axis: AxisInt, @@ -542,7 +541,7 @@ def reindex_indexer( only_slice: bool = False, # ArrayManager specific keywords use_na_proxy: bool = False, - ) -> T: + ) -> Self: axis = self._normalize_axis(axis) return self._reindex_indexer( new_axis, @@ -555,7 +554,7 @@ def reindex_indexer( ) def _reindex_indexer( - self: T, + self, new_axis, indexer: npt.NDArray[np.intp] | None, axis: AxisInt, @@ -563,7 +562,7 @@ def _reindex_indexer( allow_dups: bool = False, copy: bool | None = True, use_na_proxy: bool = False, - ) -> T: + ) -> Self: """ Parameters ---------- @@ -634,11 +633,11 @@ def _reindex_indexer( return type(self)(new_arrays, new_axes, verify_integrity=False) def take( - self: T, + self, indexer: npt.NDArray[np.intp], axis: AxisInt = 1, verify: bool = True, - ) -> T: + ) -> Self: """ Take items along any axis. """ @@ -926,7 +925,7 @@ def idelete(self, indexer) -> ArrayManager: # -------------------------------------------------------------------- # Array-wise Operation - def grouped_reduce(self: T, func: Callable) -> T: + def grouped_reduce(self, func: Callable) -> Self: """ Apply grouped reduction function columnwise, returning a new ArrayManager. @@ -965,7 +964,7 @@ def grouped_reduce(self: T, func: Callable) -> T: # expected "List[Union[ndarray, ExtensionArray]]" return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] - def reduce(self: T, func: Callable) -> T: + def reduce(self, func: Callable) -> Self: """ Apply reduction function column-wise, returning a single-row ArrayManager. @@ -1199,8 +1198,9 @@ def make_empty(self, axes=None) -> SingleArrayManager: def from_array(cls, array, index) -> SingleArrayManager: return cls([array], [index]) + # error: Cannot override writeable attribute with read-only property @property - def axes(self): + def axes(self) -> list[Index]: # type: ignore[override] return self._axes @property @@ -1254,7 +1254,8 @@ def getitem_mgr(self, indexer) -> SingleArrayManager: new_index = self.index[indexer] return type(self)([new_array], [new_index]) - def apply(self, func, **kwargs): + # error: Signature of "apply" incompatible with supertype "BaseArrayManager" + def apply(self, func, **kwargs) -> Self: # type: ignore[override] if callable(func): new_array = func(self.array, **kwargs) else: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e5f50bb35d6bd..2d2ef51c1d494 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -8,7 +8,6 @@ Hashable, Literal, Sequence, - TypeVar, cast, ) import warnings @@ -84,11 +83,10 @@ AxisInt, DtypeObj, QuantileInterpolation, + Self, Shape, npt, - type_t, ) -T = TypeVar("T", bound="BaseBlockManager") class BaseBlockManager(DataManager): @@ -160,7 +158,7 @@ def __init__(self, blocks, axes, verify_integrity: bool = True) -> None: raise NotImplementedError @classmethod - def from_blocks(cls: type_t[T], blocks: list[Block], axes: list[Index]) -> T: + def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> Self: raise NotImplementedError @property @@ -190,7 +188,7 @@ def blklocs(self) -> npt.NDArray[np.intp]: return self._blklocs - def make_empty(self: T, axes=None) -> T: + def make_empty(self, axes=None) -> Self: """return an empty BlockManager with the items axis of len 0""" if axes is None: axes = [Index([])] + self.axes[1:] @@ -303,11 +301,11 @@ def __repr__(self) -> str: return output def apply( - self: T, + self, f, align_keys: list[str] | None = None, **kwargs, - ) -> T: + ) -> Self: """ Iterate over the blocks, collect and create a new BlockManager. @@ -354,7 +352,7 @@ def apply( out = type(self).from_blocks(result_blocks, self.axes) return out - def where(self: T, other, cond, align: bool) -> T: + def where(self, other, cond, align: bool) -> Self: if align: align_keys = ["other", "cond"] else: @@ -369,14 +367,14 @@ def where(self: T, other, cond, align: bool) -> T: using_cow=using_copy_on_write(), ) - def round(self: T, decimals: int, using_cow: bool = False) -> T: + def round(self, decimals: int, using_cow: bool = False) -> Self: return self.apply( "round", decimals=decimals, using_cow=using_cow, ) - def setitem(self: T, indexer, value) -> T: + def setitem(self, indexer, value) -> Self: """ Set values with indexer. @@ -392,7 +390,7 @@ def setitem(self: T, indexer, value) -> T: return self.apply("setitem", indexer=indexer, value=value) - def putmask(self, mask, new, align: bool = True): + def putmask(self, mask, new, align: bool = True) -> Self: if align: align_keys = ["new", "mask"] else: @@ -407,24 +405,24 @@ def putmask(self, mask, new, align: bool = True): using_cow=using_copy_on_write(), ) - def diff(self: T, n: int, axis: AxisInt) -> T: + def diff(self, n: int, axis: AxisInt) -> Self: # only reached with self.ndim == 2 and axis == 1 axis = self._normalize_axis(axis) return self.apply("diff", n=n, axis=axis) - def interpolate(self: T, inplace: bool, **kwargs) -> T: + def interpolate(self, inplace: bool, **kwargs) -> Self: return self.apply( "interpolate", inplace=inplace, **kwargs, using_cow=using_copy_on_write() ) - def shift(self: T, periods: int, axis: AxisInt, fill_value) -> T: + def shift(self, periods: int, axis: AxisInt, fill_value) -> Self: axis = self._normalize_axis(axis) if fill_value is lib.no_default: fill_value = None return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) - def fillna(self: T, value, limit, inplace: bool, downcast) -> T: + def fillna(self, value, limit, inplace: bool, downcast) -> Self: if limit is not None: # Do this validation even if we go through one of the no-op paths limit = libalgos.validate_limit(None, limit=limit) @@ -438,7 +436,7 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T: using_cow=using_copy_on_write(), ) - def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T: + def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self: if copy is None: if using_copy_on_write(): copy = False @@ -455,7 +453,7 @@ def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> using_cow=using_copy_on_write(), ) - def convert(self: T, copy: bool | None) -> T: + def convert(self, copy: bool | None) -> Self: if copy is None: if using_copy_on_write(): copy = False @@ -466,7 +464,7 @@ def convert(self: T, copy: bool | None) -> T: return self.apply("convert", copy=copy, using_cow=using_copy_on_write()) - def replace(self: T, to_replace, value, inplace: bool) -> T: + def replace(self, to_replace, value, inplace: bool) -> Self: inplace = validate_bool_kwarg(inplace, "inplace") # NDFrame.replace ensures the not-is_list_likes here assert not is_list_like(to_replace) @@ -479,16 +477,16 @@ def replace(self: T, to_replace, value, inplace: bool) -> T: using_cow=using_copy_on_write(), ) - def replace_regex(self, **kwargs): + def replace_regex(self, **kwargs) -> Self: return self.apply("_replace_regex", **kwargs, using_cow=using_copy_on_write()) def replace_list( - self: T, + self, src_list: list[Any], dest_list: list[Any], inplace: bool = False, regex: bool = False, - ) -> T: + ) -> Self: """do a list replace""" inplace = validate_bool_kwarg(inplace, "inplace") @@ -503,7 +501,7 @@ def replace_list( bm._consolidate_inplace() return bm - def to_native_types(self: T, **kwargs) -> T: + def to_native_types(self, **kwargs) -> Self: """ Convert values to native types (strings / python objects) that are used in formatting (repr / csv). @@ -534,11 +532,11 @@ def is_view(self) -> bool: return False - def _get_data_subset(self: T, predicate: Callable) -> T: + def _get_data_subset(self, predicate: Callable) -> Self: blocks = [blk for blk in self.blocks if predicate(blk.values)] return self._combine(blocks, copy=False) - def get_bool_data(self: T, copy: bool = False) -> T: + def get_bool_data(self, copy: bool = False) -> Self: """ Select blocks that are bool-dtype and columns from object-dtype blocks that are all-bool. @@ -563,7 +561,7 @@ def get_bool_data(self: T, copy: bool = False) -> T: return self._combine(new_blocks, copy) - def get_numeric_data(self: T, copy: bool = False) -> T: + def get_numeric_data(self, copy: bool = False) -> Self: """ Parameters ---------- @@ -579,8 +577,8 @@ def get_numeric_data(self: T, copy: bool = False) -> T: return self._combine(numeric_blocks, copy) def _combine( - self: T, blocks: list[Block], copy: bool = True, index: Index | None = None - ) -> T: + self, blocks: list[Block], copy: bool = True, index: Index | None = None + ) -> Self: """return a new manager with the blocks""" if len(blocks) == 0: if self.ndim == 2: @@ -616,7 +614,7 @@ def _combine( def nblocks(self) -> int: return len(self.blocks) - def copy(self: T, deep: bool | None | Literal["all"] = True) -> T: + def copy(self, deep: bool | None | Literal["all"] = True) -> Self: """ Make deep or shallow copy of BlockManager @@ -663,7 +661,7 @@ def copy_func(ax): res._consolidate_inplace() return res - def consolidate(self: T) -> T: + def consolidate(self) -> Self: """ Join together blocks having same dtype @@ -680,7 +678,7 @@ def consolidate(self: T) -> T: return bm def reindex_indexer( - self: T, + self, new_axis: Index, indexer: npt.NDArray[np.intp] | None, axis: AxisInt, @@ -690,7 +688,7 @@ def reindex_indexer( only_slice: bool = False, *, use_na_proxy: bool = False, - ) -> T: + ) -> Self: """ Parameters ---------- @@ -926,11 +924,11 @@ def _make_na_block( return new_block_2d(block_values, placement=placement) def take( - self: T, + self, indexer: npt.NDArray[np.intp], axis: AxisInt = 1, verify: bool = True, - ) -> T: + ) -> Self: """ Take items along any axis. @@ -1006,7 +1004,7 @@ def _verify_integrity(self) -> None: ) @classmethod - def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> BlockManager: + def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> Self: """ Constructor for BlockManager and SingleBlockManager with same signature. """ @@ -1472,7 +1470,7 @@ def idelete(self, indexer) -> BlockManager: # ---------------------------------------------------------------- # Block-wise Operation - def grouped_reduce(self: T, func: Callable) -> T: + def grouped_reduce(self, func: Callable) -> Self: """ Apply grouped reduction function blockwise, returning a new BlockManager. @@ -1505,7 +1503,7 @@ def grouped_reduce(self: T, func: Callable) -> T: return type(self).from_blocks(result_blocks, [self.axes[0], index]) - def reduce(self: T, func: Callable) -> T: + def reduce(self, func: Callable) -> Self: """ Apply reduction function blockwise, returning a single-row BlockManager. @@ -1543,12 +1541,12 @@ def _equal_values(self: BlockManager, other: BlockManager) -> bool: return blockwise_all(self, other, array_equals) def quantile( - self: T, + self, *, qs: Index, # with dtype float 64 axis: AxisInt = 0, interpolation: QuantileInterpolation = "linear", - ) -> T: + ) -> Self: """ Iterate over blocks applying quantile reduction. This routine is intended for reduction type operations and @@ -1645,7 +1643,7 @@ def unstack(self, unstacker, fill_value) -> BlockManager: bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False) return bm - def to_dict(self, copy: bool = True): + def to_dict(self, copy: bool = True) -> dict[str, Self]: """ Return a dict of str(dtype) -> BlockManager @@ -1853,7 +1851,7 @@ def from_blocks( cls, blocks: list[Block], axes: list[Index], - ) -> SingleBlockManager: + ) -> Self: """ Constructor for BlockManager and SingleBlockManager with same signature. """ @@ -2005,7 +2003,7 @@ def array_values(self): """The array that Series.array returns""" return self._block.array_values - def get_numeric_data(self, copy: bool = False): + def get_numeric_data(self, copy: bool = False) -> Self: if self._block.is_numeric: return self.copy(deep=copy) return self.make_empty() @@ -2062,7 +2060,7 @@ def set_values(self, values: ArrayLike) -> None: self.blocks[0].values = values self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values))) - def _equal_values(self: T, other: T) -> bool: + def _equal_values(self, other: Self) -> bool: """ Used in .equals defined in base class. Only check the column values assuming shape and indexes have already been checked. diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 1b020a3d96411..1c4727fda4e64 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -2611,7 +2611,7 @@ def extract( return self._wrap_result(result, name=name) @forbid_nonstring_types(["bytes"]) - def extractall(self, pat, flags: int = 0): + def extractall(self, pat, flags: int = 0) -> DataFrame: r""" Extract capture groups in the regex `pat` as columns in DataFrame. @@ -3295,7 +3295,7 @@ def _get_group_names(regex: re.Pattern) -> list[Hashable]: return [names.get(1 + i, i) for i in range(regex.groups)] -def str_extractall(arr, pat, flags: int = 0): +def str_extractall(arr, pat, flags: int = 0) -> DataFrame: regex = re.compile(pat, flags=flags) # the regex must contain capture groups. if regex.groups == 0: diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index c3a87e04c1158..98a8697740266 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -334,7 +334,7 @@ def _update_other_units(self, props: dict[str, str]) -> dict[str, str]: ) return props - def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS): + def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS) -> str: def _error(): warnings.warn( f"Unhandled size: {repr(in_val)}", diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 58a8482fd3988..9e4d8c28f33ab 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -119,6 +119,7 @@ AxisInt, DtypeArg, FilePath, + Self, Shape, npt, ) @@ -631,7 +632,7 @@ def __repr__(self) -> str: pstr = pprint_thing(self._path) return f"{type(self)}\nFile path: {pstr}\n" - def __enter__(self) -> HDFStore: + def __enter__(self) -> Self: return self def __exit__( @@ -3399,7 +3400,7 @@ def description(self): return self.table.description @property - def axes(self): + def axes(self) -> itertools.chain[IndexCol]: return itertools.chain(self.index_axes, self.values_axes) @property @@ -3694,7 +3695,7 @@ def create_index( def _read_axes( self, where, start: int | None = None, stop: int | None = None - ) -> list[tuple[ArrayLike, ArrayLike]]: + ) -> list[tuple[np.ndarray, np.ndarray] | tuple[Index, Index]]: """ Create the axes sniffed from the table. diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0400d4c7e2d07..1eb24f4a6c375 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -72,9 +72,9 @@ DtypeArg, DtypeBackend, IndexLabel, + Self, ) - # ----------------------------------------------------------------------------- # -- Helper functions @@ -329,6 +329,7 @@ def read_sql_table( if dtype_backend is lib.no_default: dtype_backend = "numpy" # type: ignore[assignment] + assert dtype_backend is not lib.no_default with pandasSQL_builder(con, schema=schema, need_transaction=True) as pandas_sql: if not pandas_sql.has_table(table_name): @@ -460,6 +461,7 @@ def read_sql_query( if dtype_backend is lib.no_default: dtype_backend = "numpy" # type: ignore[assignment] + assert dtype_backend is not lib.no_default with pandasSQL_builder(con) as pandas_sql: return pandas_sql.read_query( @@ -624,6 +626,7 @@ def read_sql( if dtype_backend is lib.no_default: dtype_backend = "numpy" # type: ignore[assignment] + assert dtype_backend is not lib.no_default with pandasSQL_builder(con) as pandas_sql: if isinstance(pandas_sql, SQLiteDatabase): @@ -634,7 +637,7 @@ def read_sql( coerce_float=coerce_float, parse_dates=parse_dates, chunksize=chunksize, - dtype_backend=dtype_backend, # type: ignore[arg-type] + dtype_backend=dtype_backend, dtype=dtype, ) @@ -1341,7 +1344,7 @@ class PandasSQL(PandasObject, ABC): Subclasses Should define read_query and to_sql. """ - def __enter__(self): + def __enter__(self) -> Self: return self def __exit__(self, *args) -> None: