Skip to content

Commit cb16826

Browse files
committed
Fixed failed pre-commit.ci hooks : Formatting errors in algorithms.py, inconsistent-namespace-usage in test_isin.py, sorted whatsnew entry
1 parent dbe3673 commit cb16826

File tree

3 files changed

+24
-21
lines changed

3 files changed

+24
-21
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -800,11 +800,11 @@ Other
800800
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
801801
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
802802
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
803+
- Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)
803804
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
804805
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
805806
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`)
806807
- Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`)
807-
- Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`)
808808
- Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`)
809809
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
810810
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)

pandas/core/algorithms.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@
2323
iNaT,
2424
lib,
2525
)
26-
2726
from pandas._libs.missing import NA
28-
2927
from pandas._typing import (
3028
AnyArrayLike,
3129
ArrayLike,
@@ -543,23 +541,24 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
543541
elif isinstance(values.dtype, ExtensionDtype):
544542
return isin(np.asarray(comps_array), np.asarray(values))
545543

546-
# GH16012
547-
# Ensure np.isin doesn't get object types or it *may* throw an exception
548-
# Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array),
549-
# isin is faster for small sizes
550-
551544
# GH60678
552545
# Ensure values don't contain <NA>, otherwise it throws exception with np.in1d
546+
553547
values_contains_NA = False
554-
555-
if comps_array.dtype != object and len(values) <= 26:
548+
549+
if comps_array.dtype != object and len(values) <= 26:
556550
values_contains_NA = any(v is NA for v in values)
557551

552+
# GH16012
553+
# Ensure np.isin doesn't get object types or it *may* throw an exception
554+
# Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array),
555+
# isin is faster for small sizes
556+
558557
if (
559558
len(comps_array) > _MINIMUM_COMP_ARR_LEN
560559
and len(values) <= 26
561560
and comps_array.dtype != object
562-
and values_contains_NA == False
561+
and not values_contains_NA
563562
):
564563
# If the values include nan we need to check for nan explicitly
565564
# since np.nan it not equal to np.nan

pandas/tests/series/methods/test_isin.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -211,26 +211,30 @@ def test_isin_large_series_mixed_dtypes_and_nan(monkeypatch):
211211
tm.assert_series_equal(result, expected)
212212

213213

214-
@pytest.mark.parametrize("dtype, data, values, expected", [
215-
("boolean", [pd.NA, False, True], [False, pd.NA], [True, True, False]),
216-
("Int64", [pd.NA, 2, 1], [1, pd.NA], [True, False, True]),
217-
("boolean", [pd.NA, False, True], [pd.NA, True, 'a', 20], [True, False, True]),
218-
("boolean", [pd.NA, False, True], [], [False, False, False]),
219-
("Float64", [20.0, 30.0, pd.NA], [pd.NA], [False, False, True]),
220-
])
214+
@pytest.mark.parametrize(
215+
"dtype, data, values, expected",
216+
[
217+
("boolean", [pd.NA, False, True], [False, pd.NA], [True, True, False]),
218+
("Int64", [pd.NA, 2, 1], [1, pd.NA], [True, False, True]),
219+
("boolean", [pd.NA, False, True], [pd.NA, True, "a", 20], [True, False, True]),
220+
("boolean", [pd.NA, False, True], [], [False, False, False]),
221+
("Float64", [20.0, 30.0, pd.NA], [pd.NA], [False, False, True]),
222+
],
223+
)
221224
def test_isin_large_series_and_pdNA(dtype, data, values, expected, monkeypatch):
222225
# https://github.com/pandas-dev/pandas/issues/60678
223-
# combination of large series (> _MINIMUM_COMP_ARR_LEN elements) and
224-
# values contains pdNA
226+
# combination of large series (> _MINIMUM_COMP_ARR_LEN elements) and
227+
# values contains pdNA
225228
min_isin_comp = 2
226229
ser = Series(data, dtype=dtype)
227-
expected = pd.Series(expected, dtype="boolean")
230+
expected = Series(expected, dtype="boolean")
228231

229232
with monkeypatch.context() as m:
230233
m.setattr(algorithms, "_MINIMUM_COMP_ARR_LEN", min_isin_comp)
231234
result = ser.isin(values)
232235
tm.assert_series_equal(result, expected)
233236

237+
234238
def test_isin_complex_numbers():
235239
# GH 17927
236240
array = [0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j]

0 commit comments

Comments
 (0)