@@ -139,8 +139,8 @@ class BaseBlockManager(DataManager):
139
139
140
140
__slots__ = ()
141
141
142
- _blknos : np .ndarray
143
- _blklocs : np .ndarray
142
+ _blknos : npt . NDArray [ np .intp ]
143
+ _blklocs : npt . NDArray [ np .intp ]
144
144
blocks : tuple [Block , ...]
145
145
axes : list [Index ]
146
146
@@ -156,7 +156,7 @@ def from_blocks(cls: type_t[T], blocks: list[Block], axes: list[Index]) -> T:
156
156
raise NotImplementedError
157
157
158
158
@property
159
- def blknos (self ):
159
+ def blknos (self ) -> npt . NDArray [ np . intp ] :
160
160
"""
161
161
Suppose we want to find the array corresponding to our i'th column.
162
162
@@ -172,7 +172,7 @@ def blknos(self):
172
172
return self ._blknos
173
173
174
174
@property
175
- def blklocs (self ):
175
+ def blklocs (self ) -> npt . NDArray [ np . intp ] :
176
176
"""
177
177
See blknos.__doc__
178
178
"""
@@ -1151,23 +1151,8 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
1151
1151
1152
1152
block = new_block (values = value , ndim = self .ndim , placement = slice (loc , loc + 1 ))
1153
1153
1154
- for blkno , count in _fast_count_smallints (self .blknos [loc :]):
1155
- blk = self .blocks [blkno ]
1156
- if count == len (blk .mgr_locs ):
1157
- blk .mgr_locs = blk .mgr_locs .add (1 )
1158
- else :
1159
- new_mgr_locs = blk .mgr_locs .as_array .copy ()
1160
- new_mgr_locs [new_mgr_locs >= loc ] += 1
1161
- blk .mgr_locs = BlockPlacement (new_mgr_locs )
1162
-
1163
- # Accessing public blklocs ensures the public versions are initialized
1164
- if loc == self .blklocs .shape [0 ]:
1165
- # np.append is a lot faster, let's use it if we can.
1166
- self ._blklocs = np .append (self ._blklocs , 0 )
1167
- self ._blknos = np .append (self ._blknos , len (self .blocks ))
1168
- else :
1169
- self ._blklocs = np .insert (self ._blklocs , loc , 0 )
1170
- self ._blknos = np .insert (self ._blknos , loc , len (self .blocks ))
1154
+ self ._insert_update_mgr_locs (loc )
1155
+ self ._insert_update_blklocs_and_blknos (loc )
1171
1156
1172
1157
self .axes [0 ] = new_axis
1173
1158
self .blocks += (block ,)
@@ -1184,6 +1169,38 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
1184
1169
stacklevel = 5 ,
1185
1170
)
1186
1171
1172
+ def _insert_update_mgr_locs (self , loc ) -> None :
1173
+ """
1174
+ When inserting a new Block at location 'loc', we increment
1175
+ all of the mgr_locs of blocks above that by one.
1176
+ """
1177
+ for blkno , count in _fast_count_smallints (self .blknos [loc :]):
1178
+ # .620 this way, .326 of which is in increment_above
1179
+ blk = self .blocks [blkno ]
1180
+ blk ._mgr_locs = blk ._mgr_locs .increment_above (loc )
1181
+
1182
+ def _insert_update_blklocs_and_blknos (self , loc ) -> None :
1183
+ """
1184
+ When inserting a new Block at location 'loc', we update our
1185
+ _blklocs and _blknos.
1186
+ """
1187
+
1188
+ # Accessing public blklocs ensures the public versions are initialized
1189
+ if loc == self .blklocs .shape [0 ]:
1190
+ # np.append is a lot faster, let's use it if we can.
1191
+ self ._blklocs = np .append (self ._blklocs , 0 )
1192
+ self ._blknos = np .append (self ._blknos , len (self .blocks ))
1193
+ elif loc == 0 :
1194
+ # np.append is a lot faster, let's use it if we can.
1195
+ self ._blklocs = np .append (self ._blklocs [::- 1 ], 0 )[::- 1 ]
1196
+ self ._blknos = np .append (self ._blknos [::- 1 ], len (self .blocks ))[::- 1 ]
1197
+ else :
1198
+ new_blklocs , new_blknos = libinternals .update_blklocs_and_blknos (
1199
+ self .blklocs , self .blknos , loc , len (self .blocks )
1200
+ )
1201
+ self ._blklocs = new_blklocs
1202
+ self ._blknos = new_blknos
1203
+
1187
1204
def idelete (self , indexer ) -> BlockManager :
1188
1205
"""
1189
1206
Delete selected locations, returning a new BlockManager.
@@ -2050,11 +2067,13 @@ def _merge_blocks(
2050
2067
return blocks
2051
2068
2052
2069
2053
- def _fast_count_smallints (arr : np . ndarray ) -> np .ndarray :
2070
+ def _fast_count_smallints (arr : npt . NDArray [ np .intp ]) :
2054
2071
"""Faster version of set(arr) for sequences of small numbers."""
2055
- counts = np .bincount (arr .astype (np .int_ ))
2072
+ counts = np .bincount (arr .astype (np .int_ , copy = False ))
2056
2073
nz = counts .nonzero ()[0 ]
2057
- return np .c_ [nz , counts [nz ]]
2074
+ # Note: list(zip(...) outperforms list(np.c_[nz, counts[nz]]) here,
2075
+ # in one benchmark by a factor of 11
2076
+ return zip (nz , counts [nz ])
2058
2077
2059
2078
2060
2079
def _preprocess_slice_or_indexer (
0 commit comments