Skip to content

Commit

Permalink
TST: Catch more pyarrow PerformanceWarnings (pandas-dev#48699)
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored and phofl committed Sep 22, 2022
1 parent 359cc5c commit 34e2b21
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 29 deletions.
8 changes: 4 additions & 4 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,28 +565,28 @@ def test_isin(dtype, fixed_now_ts):
s = pd.Series(["a", "b", None], dtype=dtype)

with tm.maybe_produces_warning(
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
):
result = s.isin(["a", "c"])
expected = pd.Series([True, False, False])
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
):
result = s.isin(["a", pd.NA])
expected = pd.Series([True, False, True])
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
):
result = s.isin([])
expected = pd.Series([False, False, False])
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
):
result = s.isin(["a", fixed_now_ts])
expected = pd.Series([True, False, False])
Expand Down
20 changes: 13 additions & 7 deletions pandas/tests/base/test_unique.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def test_unique(index_or_series_obj):
obj = np.repeat(obj, range(1, len(obj) + 1))
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
result = obj.unique()

Expand Down Expand Up @@ -59,7 +60,8 @@ def test_unique_null(null_obj, index_or_series_obj):
obj = klass(repeated_values, dtype=obj.dtype)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
result = obj.unique()

Expand Down Expand Up @@ -88,10 +90,11 @@ def test_nunique(index_or_series_obj):
obj = np.repeat(obj, range(1, len(obj) + 1))
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
expected = len(obj.unique())
assert obj.nunique(dropna=False) == expected
assert obj.nunique(dropna=False) == expected


@pytest.mark.parametrize("null_obj", [np.nan, None])
Expand All @@ -116,17 +119,20 @@ def test_nunique_null(null_obj, index_or_series_obj):
else:
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
num_unique_values = len(obj.unique())
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
assert obj.nunique() == max(0, num_unique_values - 1)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
assert obj.nunique(dropna=False) == max(0, num_unique_values)

Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1215,7 +1215,10 @@ def test_unique(self, data, box, method, request):
reason=f"unique has no pyarrow kernel for {pa_dtype}.",
)
)
super().test_unique(data, box, method)
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under2p0, check_stacklevel=False
):
super().test_unique(data, box, method)

@pytest.mark.parametrize("na_sentinel", [-1, -2])
def test_factorize(self, data_for_grouping, na_sentinel, request):
Expand Down Expand Up @@ -1245,7 +1248,10 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel, request):
reason=f"dictionary_encode has no pyarrow kernel for {pa_dtype}",
)
)
super().test_factorize_equivalence(data_for_grouping, na_sentinel)
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under2p0, check_stacklevel=False
):
super().test_factorize_equivalence(data_for_grouping, na_sentinel)

def test_factorize_empty(self, data, request):
pa_dtype = data.dtype.pyarrow_dtype
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pytest

from pandas.compat import (
pa_version_under2p0,
pa_version_under6p0,
pa_version_under7p0,
)
Expand Down Expand Up @@ -319,6 +320,26 @@ def test_sort_values_frame(self, data_for_sorting, ascending):
):
super().test_sort_values_frame(data_for_sorting, ascending)

@pytest.mark.parametrize("box", [pd.Series, lambda x: x])
@pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
def test_unique(self, data, box, method):
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and getattr(data.dtype, "storage", "") == "pyarrow",
check_stacklevel=False,
):
super().test_unique(data, box, method)

@pytest.mark.parametrize("na_sentinel", [-1, -2])
def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0
and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow",
check_stacklevel=False,
):
super().test_factorize_equivalence(data_for_grouping, na_sentinel)


class TestCasting(base.BaseCastingTests):
pass
Expand Down
31 changes: 21 additions & 10 deletions pandas/tests/indexes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from pandas.compat import (
IS64,
pa_version_under2p0,
pa_version_under7p0,
)
from pandas.errors import PerformanceWarning
Expand Down Expand Up @@ -229,7 +230,12 @@ def test_unique(self, index_flat):
except NotImplementedError:
pass

result = idx.unique()
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0
and getattr(index_flat.dtype, "storage", "") == "pyarrow",
):
result = idx.unique()
tm.assert_index_equal(result, idx_unique)

# nans:
Expand All @@ -248,8 +254,14 @@ def test_unique(self, index_flat):
assert idx_unique_nan.dtype == index.dtype

expected = idx_unique_nan
for i in [idx_nan, idx_unique_nan]:
result = i.unique()
for pos, i in enumerate([idx_nan, idx_unique_nan]):
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0
and getattr(index_flat.dtype, "storage", "") == "pyarrow"
and pos == 0,
):
result = i.unique()
tm.assert_index_equal(result, expected)

def test_searchsorted_monotonic(self, index_flat, request):
Expand Down Expand Up @@ -466,13 +478,12 @@ def test_hasnans_isnans(self, index_flat):

@pytest.mark.parametrize("na_position", [None, "middle"])
def test_sort_values_invalid_na_position(index_with_missing, na_position):
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0
and getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
check_stacklevel=False,
):
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
with tm.maybe_produces_warning(
PerformanceWarning,
getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
check_stacklevel=False,
):
index_with_missing.sort_values(na_position=na_position)


Expand Down
16 changes: 10 additions & 6 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
import numpy as np
import pytest

from pandas.compat import pa_version_under7p0
from pandas.compat import (
pa_version_under2p0,
pa_version_under7p0,
)
from pandas.errors import PerformanceWarning

from pandas.core.dtypes.cast import find_common_type
Expand Down Expand Up @@ -573,14 +576,15 @@ def test_intersection_duplicates_all_indexes(index):
# No duplicates in empty indexes
return

def check_intersection_commutative(left, right):
assert left.intersection(right).equals(right.intersection(left))

idx = index
idx_non_unique = idx[[0, 0, 1, 2]]

check_intersection_commutative(idx, idx_non_unique)
assert idx.intersection(idx_non_unique).is_unique
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and getattr(index.dtype, "storage", "") == "pyarrow",
):
assert idx.intersection(idx_non_unique).equals(idx_non_unique.intersection(idx))
assert idx.intersection(idx_non_unique).is_unique


@pytest.mark.parametrize(
Expand Down

0 comments on commit 34e2b21

Please sign in to comment.