diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi index d35d414aaa012..a46a1747d1d8d 100644 --- a/pandas/_libs/internals.pyi +++ b/pandas/_libs/internals.pyi @@ -11,6 +11,9 @@ from pandas._typing import ( T, ) +from pandas import Index +from pandas.core.internals.blocks import Block as B + def slice_len(slc: slice, objlen: int = ...) -> int: ... @@ -66,3 +69,13 @@ class NumpyBlock(SharedBlock): class Block(SharedBlock): ... + +class BlockManager: + blocks: tuple[B, ...] + axes: list[Index] + _known_consolidated: bool + _is_consolidated: bool + _blknos: np.ndarray + _blklocs: np.ndarray + + def __init__(self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=True): ... diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index d7c5882e92f97..3fd580684a6a2 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -533,3 +533,80 @@ cdef class Block(SharedBlock): # set values here the (implicit) call to SharedBlock.__cinit__ will # set placement and ndim self.values = values + + +@cython.freelist(64) +cdef class BlockManager: + cdef: + public tuple blocks + public list axes + public bint _known_consolidated, _is_consolidated + public ndarray _blknos, _blklocs + + def __cinit__(self, blocks, axes, verify_integrity=True): + if isinstance(blocks, list): + # Backward compat for e.g. pyarrow + blocks = tuple(blocks) + + self.blocks = blocks + self.axes = axes.copy() # copy to make sure we are not remotely-mutable + + # Populate known_consolidate, blknos, and blklocs lazily + self._known_consolidated = False + self._is_consolidated = False + # error: Incompatible types in assignment (expression has type "None", + # variable has type "ndarray") + self._blknos = None # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "None", + # variable has type "ndarray") + self._blklocs = None # type: ignore[assignment] + + # ------------------------------------------------------------------- + # Pickle + + cpdef __reduce__(self): + if len(self.axes) == 1: + # SingleBlockManager, __init__ expects Block, axis + args = (self.blocks[0], self.axes[0]) + else: + args = (self.blocks, self.axes) + return type(self), args + + cpdef __setstate__(self, state): + from pandas.core.construction import extract_array + from pandas.core.internals.blocks import ( + ensure_block_shape, + new_block, + ) + from pandas.core.internals.managers import ensure_index + + if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: + state = state[3]["0.14.1"] + axes = [ensure_index(ax) for ax in state["axes"]] + ndim = len(axes) + + for blk in state["blocks"]: + vals = blk["values"] + # older versions may hold e.g. DatetimeIndex instead of DTA + vals = extract_array(vals, extract_numpy=True) + blk["values"] = ensure_block_shape(vals, ndim=ndim) + + nbs = [ + new_block(blk["values"], blk["mgr_locs"], ndim=ndim) + for blk in state["blocks"] + ] + blocks = tuple(nbs) + self.blocks = blocks + self.axes = axes + + else: + raise NotImplementedError("pre-0.14.1 pickles are no longer supported") + + self._post_setstate() + + def _post_setstate(self) -> None: + self._is_consolidated = False + self._known_consolidated = False + self._rebuild_blknos_and_blklocs() + + # ------------------------------------------------------------------- diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index ec0678cd87f7e..5b958163159aa 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -489,6 +489,6 @@ cdef class BlockSlider: Ensure that we have the original blocks, blknos, and blklocs. """ mgr = self.dummy._mgr - mgr.blocks = self.blocks + mgr.blocks = tuple(self.blocks) mgr._blklocs = self.orig_blklocs mgr._blknos = self.orig_blknos diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 499506bea8c58..ca539eefd3aee 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -21,6 +21,7 @@ PeriodArray, TimedeltaArray, ) +from pandas.core.internals import BlockManager if TYPE_CHECKING: from pandas import ( @@ -222,7 +223,8 @@ def load_newobj(self): elif issubclass(cls, TimedeltaArray) and not args: arr = np.array([], dtype="m8[ns]") obj = cls.__new__(cls, arr, arr.dtype) - + elif cls is BlockManager and not args: + obj = cls.__new__(cls, (), [], False) else: obj = cls.__new__(cls, *args) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 687c8768fb251..0b0013eeb7147 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -225,7 +225,7 @@ def concatenate_managers( b = new_block(new_values, placement=placement, ndim=len(axes)) blocks.append(b) - return BlockManager(blocks, axes) + return BlockManager(tuple(blocks), axes) def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarray]): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 07ee23cec6648..373d3566e1e8a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -135,22 +135,16 @@ class BaseBlockManager(DataManager): This is *not* a public API class """ - __slots__ = [ - "axes", - "blocks", - "_known_consolidated", - "_is_consolidated", - "_blknos", - "_blklocs", - ] + __slots__ = () _blknos: np.ndarray _blklocs: np.ndarray blocks: tuple[Block, ...] axes: list[Index] - # Non-trivially faster than a property ndim: int + _known_consolidated: bool + _is_consolidated: bool def __init__(self, blocks, axes, verify_integrity=True): raise NotImplementedError @@ -276,57 +270,6 @@ def arrays(self) -> list[ArrayLike]: """ return [blk.values for blk in self.blocks] - def __getstate__(self): - block_values = [b.values for b in self.blocks] - block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks] - axes_array = list(self.axes) - - extra_state = { - "0.14.1": { - "axes": axes_array, - "blocks": [ - {"values": b.values, "mgr_locs": b.mgr_locs.indexer} - for b in self.blocks - ], - } - } - - # First three elements of the state are to maintain forward - # compatibility with 0.13.1. - return axes_array, block_values, block_items, extra_state - - def __setstate__(self, state): - def unpickle_block(values, mgr_locs, ndim: int) -> Block: - # TODO(EA2D): ndim would be unnecessary with 2D EAs - # older pickles may store e.g. DatetimeIndex instead of DatetimeArray - values = extract_array(values, extract_numpy=True) - return new_block(values, placement=mgr_locs, ndim=ndim) - - if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: - state = state[3]["0.14.1"] - self.axes = [ensure_index(ax) for ax in state["axes"]] - ndim = len(self.axes) - - for blk in state["blocks"]: - vals = blk["values"] - # older versions may hold e.g. DatetimeIndex instead of DTA - vals = extract_array(vals, extract_numpy=True) - blk["values"] = ensure_block_shape(vals, ndim=ndim) - - self.blocks = tuple( - unpickle_block(b["values"], b["mgr_locs"], ndim=ndim) - for b in state["blocks"] - ) - else: - raise NotImplementedError("pre-0.14.1 pickles are no longer supported") - - self._post_setstate() - - def _post_setstate(self) -> None: - self._is_consolidated = False - self._known_consolidated = False - self._rebuild_blknos_and_blklocs() - def __repr__(self) -> str: output = type(self).__name__ for i, ax in enumerate(self.axes): @@ -823,7 +766,7 @@ def consolidate(self: T) -> T: if self.is_consolidated(): return self - bm = type(self)(self.blocks, self.axes) + bm = type(self)(self.blocks, self.axes, verify_integrity=False) bm._is_consolidated = False bm._consolidate_inplace() return bm @@ -1079,7 +1022,7 @@ def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: ) -class BlockManager(BaseBlockManager): +class BlockManager(libinternals.BlockManager, BaseBlockManager): """ BaseBlockManager that holds 2D blocks. """ @@ -1095,27 +1038,18 @@ def __init__( axes: Sequence[Index], verify_integrity: bool = True, ): - self.axes = [ensure_index(ax) for ax in axes] - self.blocks: tuple[Block, ...] = tuple(blocks) - - for block in blocks: - if self.ndim != block.ndim: - raise AssertionError( - f"Number of Block dimensions ({block.ndim}) must equal " - f"number of axes ({self.ndim})" - ) if verify_integrity: - self._verify_integrity() + assert all(isinstance(x, Index) for x in axes) - # Populate known_consolidate, blknos, and blklocs lazily - self._known_consolidated = False - # error: Incompatible types in assignment (expression has type "None", - # variable has type "ndarray") - self._blknos = None # type: ignore[assignment] - # error: Incompatible types in assignment (expression has type "None", - # variable has type "ndarray") - self._blklocs = None # type: ignore[assignment] + for block in blocks: + if self.ndim != block.ndim: + raise AssertionError( + f"Number of Block dimensions ({block.ndim}) must equal " + f"number of axes ({self.ndim})" + ) + + self._verify_integrity() def _verify_integrity(self) -> None: mgr_shape = self.shape @@ -1130,21 +1064,6 @@ def _verify_integrity(self) -> None: f"tot_items: {tot_items}" ) - @classmethod - def _simple_new(cls, blocks: tuple[Block, ...], axes: list[Index]): - """ - Fastpath constructor; does NO validation. - """ - obj = cls.__new__(cls) - obj.axes = axes - obj.blocks = blocks - - # Populate known_consolidate, blknos, and blklocs lazily - obj._known_consolidated = False - obj._blknos = None - obj._blklocs = None - return obj - @classmethod def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> BlockManager: """ @@ -1210,7 +1129,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: new_axes = list(self.axes) new_axes[axis] = new_axes[axis]._getitem_slice(slobj) - return type(self)._simple_new(tuple(new_blocks), new_axes) + return type(self)(tuple(new_blocks), new_axes, verify_integrity=False) def iget(self, i: int) -> SingleBlockManager: """ @@ -1418,7 +1337,7 @@ def idelete(self, indexer) -> BlockManager: nbs = self._slice_take_blocks_ax0(taker, only_slice=True) new_columns = self.items[~is_deleted] axes = [new_columns, self.axes[1]] - return type(self)._simple_new(tuple(nbs), axes) + return type(self)(tuple(nbs), axes) # ---------------------------------------------------------------- # Block-wise Operation @@ -1602,6 +1521,45 @@ def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager: block = new_block(array, placement=slice(0, len(index)), ndim=1) return cls(block, index) + def __getstate__(self): + block_values = [b.values for b in self.blocks] + block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks] + axes_array = list(self.axes) + + extra_state = { + "0.14.1": { + "axes": axes_array, + "blocks": [ + {"values": b.values, "mgr_locs": b.mgr_locs.indexer} + for b in self.blocks + ], + } + } + + # First three elements of the state are to maintain forward + # compatibility with 0.13.1. + return axes_array, block_values, block_items, extra_state + + def __setstate__(self, state): + def unpickle_block(values, mgr_locs, ndim: int) -> Block: + # TODO(EA2D): ndim would be unnecessary with 2D EAs + # older pickles may store e.g. DatetimeIndex instead of DatetimeArray + values = extract_array(values, extract_numpy=True) + return new_block(values, placement=mgr_locs, ndim=ndim) + + if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: + state = state[3]["0.14.1"] + self.axes = [ensure_index(ax) for ax in state["axes"]] + ndim = len(self.axes) + self.blocks = tuple( + unpickle_block(b["values"], b["mgr_locs"], ndim=ndim) + for b in state["blocks"] + ) + else: + raise NotImplementedError("pre-0.14.1 pickles are no longer supported") + + self._post_setstate() + def _post_setstate(self): pass diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index 779b5100ecdd8..5f03d6709dfa4 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -76,7 +76,7 @@ def operate_blockwise( # assert len(slocs) == nlocs, (len(slocs), nlocs) # assert slocs == set(range(nlocs)), slocs - new_mgr = type(right)(res_blks, axes=right.axes, verify_integrity=False) + new_mgr = type(right)(tuple(res_blks), axes=right.axes, verify_integrity=False) return new_mgr diff --git a/pandas/core/series.py b/pandas/core/series.py index 5c605a6b441c6..f918138f1d348 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4312,8 +4312,9 @@ def _needs_reindex_multi(self, axes, method, level) -> bool: """ return False + # error: Cannot determine type of 'align' @doc( - NDFrame.align, + NDFrame.align, # type: ignore[has-type] klass=_shared_doc_kwargs["klass"], axes_single_arg=_shared_doc_kwargs["axes_single_arg"], ) @@ -4465,8 +4466,9 @@ def set_axis(self, labels, axis: Axis = ..., inplace: bool = ...) -> Series | No def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) + # error: Cannot determine type of 'reindex' @doc( - NDFrame.reindex, + NDFrame.reindex, # type: ignore[has-type] klass=_shared_doc_kwargs["klass"], axes=_shared_doc_kwargs["axes"], optional_labels=_shared_doc_kwargs["optional_labels"], @@ -4696,7 +4698,8 @@ def fillna( ) -> Series | None: ... - @doc(NDFrame.fillna, **_shared_doc_kwargs) + # error: Cannot determine type of 'fillna' + @doc(NDFrame.fillna, **_shared_doc_kwargs) # type: ignore[has-type] def fillna( self, value=None, @@ -4742,8 +4745,9 @@ def pop(self, item: Hashable) -> Any: """ return super().pop(item=item) + # error: Cannot determine type of 'replace' @doc( - NDFrame.replace, + NDFrame.replace, # type: ignore[has-type] klass=_shared_doc_kwargs["klass"], inplace=_shared_doc_kwargs["inplace"], replace_iloc=_shared_doc_kwargs["replace_iloc"], @@ -4791,7 +4795,8 @@ def _replace_single(self, to_replace, method: str, inplace: bool, limit): return result - @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) + # error: Cannot determine type of 'shift' + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> Series: return super().shift( periods=periods, freq=freq, axis=axis, fill_value=fill_value @@ -5026,19 +5031,23 @@ def _convert_dtypes( result = input_series.copy() return result - @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + # error: Cannot determine type of 'isna' + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] def isna(self) -> Series: return generic.NDFrame.isna(self) - @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) + # error: Cannot determine type of 'isna' + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] def isnull(self) -> Series: return super().isnull() - @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + # error: Cannot determine type of 'notna' + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] def notna(self) -> Series: return super().notna() - @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) + # error: Cannot determine type of 'notna' + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type] def notnull(self) -> Series: return super().notnull() @@ -5133,7 +5142,8 @@ def dropna(self, axis=0, inplace=False, how=None): # ---------------------------------------------------------------------- # Time series-oriented methods - @doc(NDFrame.asfreq, **_shared_doc_kwargs) + # error: Cannot determine type of 'asfreq' + @doc(NDFrame.asfreq, **_shared_doc_kwargs) # type: ignore[has-type] def asfreq( self, freq, @@ -5150,7 +5160,8 @@ def asfreq( fill_value=fill_value, ) - @doc(NDFrame.resample, **_shared_doc_kwargs) + # error: Cannot determine type of 'resample' + @doc(NDFrame.resample, **_shared_doc_kwargs) # type: ignore[has-type] def resample( self, rule, diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 8305ff64c42c6..abf6128699a21 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -17,7 +17,10 @@ def __init__(self, src: FilePathOrBuffer, **kwds): ParserBase.__init__(self, kwds) # #2442 - kwds["allow_leading_cols"] = self.index_col is not False + # error: Cannot determine type of 'index_col' + kwds["allow_leading_cols"] = ( + self.index_col is not False # type: ignore[has-type] + ) # GH20529, validate usecol arg before TextReader kwds["usecols"] = self.usecols @@ -54,31 +57,44 @@ def __init__(self, src: FilePathOrBuffer, **kwds): raise self.unnamed_cols = self._reader.unnamed_cols - passed_names = self.names is None + # error: Cannot determine type of 'names' + passed_names = self.names is None # type: ignore[has-type] if self._reader.header is None: self.names = None else: if len(self._reader.header) > 1: # we have a multi index in the columns + # error: Cannot determine type of 'names' + # error: Cannot determine type of 'index_names' + # error: Cannot determine type of 'col_names' ( - self.names, + self.names, # type: ignore[has-type] self.index_names, self.col_names, passed_names, ) = self._extract_multi_indexer_columns( - self._reader.header, self.index_names, self.col_names, passed_names + self._reader.header, + self.index_names, # type: ignore[has-type] + self.col_names, # type: ignore[has-type] + passed_names, ) else: - self.names = list(self._reader.header[0]) + # error: Cannot determine type of 'names' + self.names = list(self._reader.header[0]) # type: ignore[has-type] - if self.names is None: + # error: Cannot determine type of 'names' + if self.names is None: # type: ignore[has-type] if self.prefix: - self.names = [ + # error: Cannot determine type of 'names' + self.names = [ # type: ignore[has-type] f"{self.prefix}{i}" for i in range(self._reader.table_width) ] else: - self.names = list(range(self._reader.table_width)) + # error: Cannot determine type of 'names' + self.names = list( # type: ignore[has-type] + range(self._reader.table_width) + ) # gh-9755 # @@ -88,7 +104,8 @@ def __init__(self, src: FilePathOrBuffer, **kwds): # # once names has been filtered, we will # then set orig_names again to names - self.orig_names = self.names[:] + # error: Cannot determine type of 'names' + self.orig_names = self.names[:] # type: ignore[has-type] if self.usecols: usecols = self._evaluate_usecols(self.usecols, self.orig_names) @@ -101,27 +118,49 @@ def __init__(self, src: FilePathOrBuffer, **kwds): ): self._validate_usecols_names(usecols, self.orig_names) - if len(self.names) > len(usecols): - self.names = [ + # error: Cannot determine type of 'names' + if len(self.names) > len(usecols): # type: ignore[has-type] + # error: Cannot determine type of 'names' + self.names = [ # type: ignore[has-type] n - for i, n in enumerate(self.names) + # error: Cannot determine type of 'names' + for i, n in enumerate(self.names) # type: ignore[has-type] if (i in usecols or n in usecols) ] - if len(self.names) < len(usecols): - self._validate_usecols_names(usecols, self.names) + # error: Cannot determine type of 'names' + if len(self.names) < len(usecols): # type: ignore[has-type] + # error: Cannot determine type of 'names' + self._validate_usecols_names( + usecols, + self.names, # type: ignore[has-type] + ) - self._validate_parse_dates_presence(self.names) + # error: Cannot determine type of 'names' + self._validate_parse_dates_presence(self.names) # type: ignore[has-type] self._set_noconvert_columns() - self.orig_names = self.names + # error: Cannot determine type of 'names' + self.orig_names = self.names # type: ignore[has-type] if not self._has_complex_date_col: - if self._reader.leading_cols == 0 and is_index_col(self.index_col): + # error: Cannot determine type of 'index_col' + if self._reader.leading_cols == 0 and is_index_col( + self.index_col # type: ignore[has-type] + ): self._name_processed = True - (index_names, self.names, self.index_col) = self._clean_index_names( - self.names, self.index_col, self.unnamed_cols + ( + index_names, + # error: Cannot determine type of 'names' + self.names, # type: ignore[has-type] + self.index_col, + ) = self._clean_index_names( + # error: Cannot determine type of 'names' + self.names, # type: ignore[has-type] + # error: Cannot determine type of 'index_col' + self.index_col, # type: ignore[has-type] + self.unnamed_cols, ) if self.index_names is None: @@ -150,8 +189,15 @@ def _set_noconvert_columns(self): undergo such conversions. """ assert self.orig_names is not None - col_indices = [self.orig_names.index(x) for x in self.names] - noconvert_columns = self._set_noconvert_dtype_columns(col_indices, self.names) + # error: Cannot determine type of 'names' + col_indices = [ + self.orig_names.index(x) for x in self.names # type: ignore[has-type] + ] + # error: Cannot determine type of 'names' + noconvert_columns = self._set_noconvert_dtype_columns( + col_indices, + self.names, # type: ignore[has-type] + ) for col in noconvert_columns: self._reader.set_noconvert(col) @@ -162,7 +208,8 @@ def read(self, nrows=None): try: data = self._reader.read(nrows) except StopIteration: - if self._first_chunk: + # error: Cannot determine type of '_first_chunk' + if self._first_chunk: # type: ignore[has-type] self._first_chunk = False names = self._maybe_dedup_names(self.orig_names) index, columns, col_dict = self._get_empty_meta( @@ -187,7 +234,8 @@ def read(self, nrows=None): # Done with first read, next time raise StopIteration self._first_chunk = False - names = self.names + # error: Cannot determine type of 'names' + names = self.names # type: ignore[has-type] if self._reader.leading_cols: if self._has_complex_date_col: diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 37f553c724c9e..9f62d63c680f6 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -124,13 +124,17 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds): # The original set is stored in self.original_columns. if len(self.columns) > 1: # we are processing a multi index column + # error: Cannot determine type of 'index_names' + # error: Cannot determine type of 'col_names' ( self.columns, self.index_names, self.col_names, _, ) = self._extract_multi_indexer_columns( - self.columns, self.index_names, self.col_names + self.columns, + self.index_names, # type: ignore[has-type] + self.col_names, # type: ignore[has-type] ) # Update list of original names to include all indices. self.num_original_columns = len(self.columns) @@ -246,7 +250,8 @@ def read(self, rows=None): try: content = self._get_lines(rows) except StopIteration: - if self._first_chunk: + # error: Cannot determine type of '_first_chunk' + if self._first_chunk: # type: ignore[has-type] content = [] else: self.close() @@ -259,8 +264,12 @@ def read(self, rows=None): if not len(content): # pragma: no cover # DataFrame with the right metadata, even though it's length 0 names = self._maybe_dedup_names(self.orig_names) + # error: Cannot determine type of 'index_col' index, columns, col_dict = self._get_empty_meta( - names, self.index_col, self.index_names, self.dtype + names, + self.index_col, # type: ignore[has-type] + self.index_names, + self.dtype, ) columns = self._maybe_make_multi_index_columns(columns, self.col_names) return index, columns, col_dict @@ -287,7 +296,8 @@ def _exclude_implicit_index(self, alldata): offset = 0 if self._implicit_index: - offset = len(self.index_col) + # error: Cannot determine type of 'index_col' + offset = len(self.index_col) # type: ignore[has-type] if self._col_indices is not None and len(names) != len(self._col_indices): names = [names[i] for i in sorted(self._col_indices)] @@ -428,7 +438,9 @@ def _infer_columns(self): # line for the rest of the parsing code if hr == header[-1]: lc = len(this_columns) - ic = len(self.index_col) if self.index_col is not None else 0 + # error: Cannot determine type of 'index_col' + sic = self.index_col # type: ignore[has-type] + ic = len(sic) if sic is not None else 0 unnamed_count = len(this_unnamed_cols) if lc != unnamed_count and lc - ic > unnamed_count: @@ -838,7 +850,9 @@ def _get_index_name(self, columns): if line is not None: # leave it 0, #2442 # Case 1 - if self.index_col is not False: + # error: Cannot determine type of 'index_col' + index_col = self.index_col # type: ignore[has-type] + if index_col is not False: implicit_first_cols = len(line) - self.num_original_columns # Case 0 @@ -883,7 +897,13 @@ def _rows_to_cols(self, content): # Check that there are no rows with too many # elements in their row (rows with too few # elements are padded with NaN). - if max_len > col_len and self.index_col is not False and self.usecols is None: + # error: Non-overlapping identity check (left operand type: "List[int]", + # right operand type: "Literal[False]") + if ( + max_len > col_len + and self.index_col is not False # type: ignore[comparison-overlap] + and self.usecols is None + ): footers = self.skipfooter if self.skipfooter else 0 bad_lines = [] diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 7d4f3b62d183a..a1c5810ba8bb8 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -234,9 +234,10 @@ def create_mgr(descr, item_shape=None): ) num_offset += len(placement) + sblocks = sorted(blocks, key=lambda b: b.mgr_locs[0]) return BlockManager( - sorted(blocks, key=lambda b: b.mgr_locs[0]), - [mgr_items] + [np.arange(n) for n in item_shape], + tuple(sblocks), + [mgr_items] + [Index(np.arange(n)) for n in item_shape], ) @@ -409,7 +410,7 @@ def test_iget(self): block = new_block( values=values.copy(), placement=np.arange(3), ndim=values.ndim ) - mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)]) + mgr = BlockManager(blocks=(block,), axes=[cols, Index(np.arange(3))]) tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0]) tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1]) @@ -816,7 +817,7 @@ def test_equals_block_order_different_dtypes(self, mgr_string): bm = create_mgr(mgr_string) block_perms = itertools.permutations(bm.blocks) for bm_perm in block_perms: - bm_this = BlockManager(bm_perm, bm.axes) + bm_this = BlockManager(tuple(bm_perm), bm.axes) assert bm.equals(bm_this) assert bm_this.equals(bm)