TST: Catch more pyarrow PerformanceWarnings (pandas-dev#48699)

phofl · Sep 22, 2022 · 34e2b21 · 34e2b21
1 parent 359cc5c
commit 34e2b21
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 29 deletions.
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -565,28 +565,28 @@ def test_isin(dtype, fixed_now_ts):
     s = pd.Series(["a", "b", None], dtype=dtype)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
+        PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
     ):
         result = s.isin(["a", "c"])
     expected = pd.Series([True, False, False])
     tm.assert_series_equal(result, expected)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
+        PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
     ):
         result = s.isin(["a", pd.NA])
     expected = pd.Series([True, False, True])
     tm.assert_series_equal(result, expected)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
+        PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
     ):
         result = s.isin([])
     expected = pd.Series([False, False, False])
     tm.assert_series_equal(result, expected)
 
     with tm.maybe_produces_warning(
-        PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
+        PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
     ):
         result = s.isin(["a", fixed_now_ts])
     expected = pd.Series([True, False, False])

diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py
@@ -17,7 +17,8 @@ def test_unique(index_or_series_obj):
     obj = np.repeat(obj, range(1, len(obj) + 1))
     with tm.maybe_produces_warning(
         PerformanceWarning,
-        pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
+        pa_version_under2p0
+        and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
     ):
         result = obj.unique()
 
@@ -59,7 +60,8 @@ def test_unique_null(null_obj, index_or_series_obj):
     obj = klass(repeated_values, dtype=obj.dtype)
     with tm.maybe_produces_warning(
         PerformanceWarning,
-        pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
+        pa_version_under2p0
+        and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
     ):
         result = obj.unique()
 
@@ -88,10 +90,11 @@ def test_nunique(index_or_series_obj):
     obj = np.repeat(obj, range(1, len(obj) + 1))
     with tm.maybe_produces_warning(
         PerformanceWarning,
-        pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
+        pa_version_under2p0
+        and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
     ):
         expected = len(obj.unique())
-    assert obj.nunique(dropna=False) == expected
+        assert obj.nunique(dropna=False) == expected
 
 
 @pytest.mark.parametrize("null_obj", [np.nan, None])
@@ -116,17 +119,20 @@ def test_nunique_null(null_obj, index_or_series_obj):
     else:
         with tm.maybe_produces_warning(
             PerformanceWarning,
-            pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
+            pa_version_under2p0
+            and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
         ):
             num_unique_values = len(obj.unique())
         with tm.maybe_produces_warning(
             PerformanceWarning,
-            pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
+            pa_version_under2p0
+            and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
         ):
             assert obj.nunique() == max(0, num_unique_values - 1)
         with tm.maybe_produces_warning(
             PerformanceWarning,
-            pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
+            pa_version_under2p0
+            and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
         ):
             assert obj.nunique(dropna=False) == max(0, num_unique_values)
 

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -1215,7 +1215,10 @@ def test_unique(self, data, box, method, request):
                     reason=f"unique has no pyarrow kernel for {pa_dtype}.",
                 )
             )
-        super().test_unique(data, box, method)
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under2p0, check_stacklevel=False
+        ):
+            super().test_unique(data, box, method)
 
     @pytest.mark.parametrize("na_sentinel", [-1, -2])
     def test_factorize(self, data_for_grouping, na_sentinel, request):
@@ -1245,7 +1248,10 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel, request):
                     reason=f"dictionary_encode has no pyarrow kernel for {pa_dtype}",
                 )
             )
-        super().test_factorize_equivalence(data_for_grouping, na_sentinel)
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under2p0, check_stacklevel=False
+        ):
+            super().test_factorize_equivalence(data_for_grouping, na_sentinel)
 
     def test_factorize_empty(self, data, request):
         pa_dtype = data.dtype.pyarrow_dtype

diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
@@ -19,6 +19,7 @@
 import pytest
 
 from pandas.compat import (
+    pa_version_under2p0,
     pa_version_under6p0,
     pa_version_under7p0,
 )
@@ -319,6 +320,26 @@ def test_sort_values_frame(self, data_for_sorting, ascending):
         ):
             super().test_sort_values_frame(data_for_sorting, ascending)
 
+    @pytest.mark.parametrize("box", [pd.Series, lambda x: x])
+    @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
+    def test_unique(self, data, box, method):
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under2p0 and getattr(data.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
+            super().test_unique(data, box, method)
+
+    @pytest.mark.parametrize("na_sentinel", [-1, -2])
+    def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under2p0
+            and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
+            super().test_factorize_equivalence(data_for_grouping, na_sentinel)
+
 
 class TestCasting(base.BaseCastingTests):
     pass

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
@@ -10,6 +10,7 @@
 
 from pandas.compat import (
     IS64,
+    pa_version_under2p0,
     pa_version_under7p0,
 )
 from pandas.errors import PerformanceWarning
@@ -229,7 +230,12 @@ def test_unique(self, index_flat):
         except NotImplementedError:
             pass
 
-        result = idx.unique()
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under2p0
+            and getattr(index_flat.dtype, "storage", "") == "pyarrow",
+        ):
+            result = idx.unique()
         tm.assert_index_equal(result, idx_unique)
 
         # nans:
@@ -248,8 +254,14 @@ def test_unique(self, index_flat):
         assert idx_unique_nan.dtype == index.dtype
 
         expected = idx_unique_nan
-        for i in [idx_nan, idx_unique_nan]:
-            result = i.unique()
+        for pos, i in enumerate([idx_nan, idx_unique_nan]):
+            with tm.maybe_produces_warning(
+                PerformanceWarning,
+                pa_version_under2p0
+                and getattr(index_flat.dtype, "storage", "") == "pyarrow"
+                and pos == 0,
+            ):
+                result = i.unique()
             tm.assert_index_equal(result, expected)
 
     def test_searchsorted_monotonic(self, index_flat, request):
@@ -466,13 +478,12 @@ def test_hasnans_isnans(self, index_flat):
 
 @pytest.mark.parametrize("na_position", [None, "middle"])
 def test_sort_values_invalid_na_position(index_with_missing, na_position):
-    with tm.maybe_produces_warning(
-        PerformanceWarning,
-        pa_version_under7p0
-        and getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
-        check_stacklevel=False,
-    ):
-        with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
+    with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
             index_with_missing.sort_values(na_position=na_position)
 
 

diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
@@ -8,7 +8,10 @@
 import numpy as np
 import pytest
 
-from pandas.compat import pa_version_under7p0
+from pandas.compat import (
+    pa_version_under2p0,
+    pa_version_under7p0,
+)
 from pandas.errors import PerformanceWarning
 
 from pandas.core.dtypes.cast import find_common_type
@@ -573,14 +576,15 @@ def test_intersection_duplicates_all_indexes(index):
         # No duplicates in empty indexes
         return
 
-    def check_intersection_commutative(left, right):
-        assert left.intersection(right).equals(right.intersection(left))
-
     idx = index
     idx_non_unique = idx[[0, 0, 1, 2]]
 
-    check_intersection_commutative(idx, idx_non_unique)
-    assert idx.intersection(idx_non_unique).is_unique
+    with tm.maybe_produces_warning(
+        PerformanceWarning,
+        pa_version_under2p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+    ):
+        assert idx.intersection(idx_non_unique).equals(idx_non_unique.intersection(idx))
+        assert idx.intersection(idx_non_unique).is_unique
 
 
 @pytest.mark.parametrize(