Skip to content

Commit 02d988c

Browse files
authored
PERF: Improve performance for MultiIndex.isin (#48622)
* PERF: Improve performance for MultiIndex.isin * PERF: Improve performance for MultiIndex.isin * Fix gh ref
1 parent 7eabf30 commit 02d988c

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

asv_bench/benchmarks/multiindex_object.py

+32
Original file line numberDiff line numberDiff line change
@@ -299,4 +299,36 @@ def time_unique_dups(self, dtype_val):
299299
self.midx_dups.unique()
300300

301301

302+
class Isin:
303+
params = [
304+
("string", "int", "datetime"),
305+
]
306+
param_names = ["dtype"]
307+
308+
def setup(self, dtype):
309+
N = 10**5
310+
level1 = range(1000)
311+
312+
level2 = date_range(start="1/1/2000", periods=N // 1000)
313+
dates_midx = MultiIndex.from_product([level1, level2])
314+
315+
level2 = range(N // 1000)
316+
int_midx = MultiIndex.from_product([level1, level2])
317+
318+
level2 = tm.makeStringIndex(N // 1000).values
319+
str_midx = MultiIndex.from_product([level1, level2])
320+
321+
data = {
322+
"datetime": dates_midx,
323+
"int": int_midx,
324+
"string": str_midx,
325+
}
326+
327+
self.midx = data[dtype]
328+
self.values = self.midx[:100]
329+
330+
def time_isin(self, dtype):
331+
self.midx.isin(self.values)
332+
333+
302334
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/whatsnew/v1.6.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ Performance improvements
107107
- Performance improvement in :meth:`MultiIndex.argsort` and :meth:`MultiIndex.sort_values` (:issue:`48406`)
108108
- Performance improvement in :meth:`MultiIndex.union` without missing values and without duplicates (:issue:`48505`)
109109
- Performance improvement in :meth:`.DataFrameGroupBy.mean`, :meth:`.SeriesGroupBy.mean`, :meth:`.DataFrameGroupBy.var`, and :meth:`.SeriesGroupBy.var` for extension array dtypes (:issue:`37493`)
110+
- Performance improvement in :meth:`MultiIndex.isin` when ``level=None`` (:issue:`48622`)
110111
- Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`)
111112
- Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`)
112113
- Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`)

pandas/core/indexes/multi.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -3854,8 +3854,7 @@ def delete(self, loc) -> MultiIndex:
38543854
@doc(Index.isin)
38553855
def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
38563856
if level is None:
3857-
values = MultiIndex.from_tuples(values, names=self.names)._values
3858-
return algos.isin(self._values, values)
3857+
return MultiIndex.from_tuples(algos.unique(values)).get_indexer(self) != -1
38593858
else:
38603859
num = self._get_level_number(level)
38613860
levs = self.get_level_values(num)

0 commit comments

Comments
 (0)