From e1f1254395a36deea4d8c9707a615805890ec271 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 15 Apr 2021 14:01:15 +0100 Subject: [PATCH 1/4] test_astype_ignores_errors_for_extension_dtypes --- pandas/tests/series/methods/test_astype.py | 28 +++++++++++++++------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index d23c44733949a..bebe6948cff9c 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -10,6 +10,7 @@ import pytest from pandas._libs.tslibs import iNaT +import pandas.util._test_decorators as td from pandas import ( NA, @@ -246,25 +247,34 @@ def test_td64_series_astype_object(self): assert result.dtype == np.object_ @pytest.mark.parametrize( - "values", + "data, dtype", [ - Series(["x", "y", "z"], dtype="string"), - Series(["x", "y", "z"], dtype="category"), - Series(3 * [Timestamp("2020-01-01", tz="UTC")]), - Series(3 * [Interval(0, 1)]), + (["x", "y", "z"], "string"), + pytest.param( + ["x", "y", "z"], + "arrow_string", + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), + ), + (["x", "y", "z"], "category"), + (3 * [Timestamp("2020-01-01", tz="UTC")], None), + (3 * [Interval(0, 1)], None), ], ) @pytest.mark.parametrize("errors", ["raise", "ignore"]) - def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): + def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors): # https://github.com/pandas-dev/pandas/issues/35471 + + from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401 + + ser = Series(data, dtype=dtype) if errors == "ignore": - expected = values - result = values.astype(float, errors="ignore") + expected = ser + result = ser.astype(float, errors="ignore") tm.assert_series_equal(result, expected) else: msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): - values.astype(float, errors=errors) + ser.astype(float, errors=errors) @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) def test_astype_from_float_to_str(self, dtype): From adef0eac553b543e901fb3bde530ec596dfcdaae Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 15 Apr 2021 16:22:50 +0100 Subject: [PATCH 2/4] test_update_extension_array_series --- pandas/tests/series/methods/test_update.py | 50 ++++++++++++---------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py index 4f585a6ea029a..9a64877cb92ff 100644 --- a/pandas/tests/series/methods/test_update.py +++ b/pandas/tests/series/methods/test_update.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( CategoricalDtype, DataFrame, @@ -9,6 +11,7 @@ Timestamp, ) import pandas._testing as tm +from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401 class TestUpdate: @@ -82,37 +85,38 @@ def test_update_from_non_series(self, series, other, expected): tm.assert_series_equal(series, expected) @pytest.mark.parametrize( - "result, target, expected", + "data, other, expected, dtype", [ - ( - Series(["a", None], dtype="string"), - Series([None, "b"], dtype="string"), - Series(["a", "b"], dtype="string"), - ), - ( - Series([1, None], dtype="Int64"), - Series([None, 2], dtype="Int64"), - Series([1, 2], dtype="Int64"), + (["a", None], [None, "b"], ["a", "b"], "string"), + pytest.param( + ["a", None], + [None, "b"], + ["a", "b"], + "arrow_string", + marks=td.skip_if_no("pyarrow", min_version="1.0.0"), ), + ([1, None], [None, 2], [1, 2], "Int64"), + ([True, None], [None, False], [True, False], "boolean"), ( - Series([True, None], dtype="boolean"), - Series([None, False], dtype="boolean"), - Series([True, False], dtype="boolean"), + ["a", None], + [None, "b"], + ["a", "b"], + CategoricalDtype(categories=["a", "b"]), ), ( - Series(["a", None], dtype=CategoricalDtype(categories=["a", "b"])), - Series([None, "b"], dtype=CategoricalDtype(categories=["a", "b"])), - Series(["a", "b"], dtype=CategoricalDtype(categories=["a", "b"])), - ), - ( - Series([Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT]), - Series([NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")]), - Series([Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2), + [Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT], + [NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")], + [Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2, + "datetime64[ns, Europe/London]", ), ], ) - def test_update_extension_array_series(self, result, target, expected): - result.update(target) + def test_update_extension_array_series(self, data, other, expected, dtype): + result = Series(data, dtype=dtype) + other = Series(other, dtype=dtype) + expected = Series(expected, dtype=dtype) + + result.update(other) tm.assert_series_equal(result, expected) def test_update_with_categorical_type(self): From 2c0fadda1ff93b844a01f4dafe47e001772c8c52 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 15 Apr 2021 16:40:58 +0100 Subject: [PATCH 3/4] test_fullmatch --- pandas/tests/strings/test_find_replace.py | 32 ++++++++++++----------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index ef27d582b4e0f..ab95b2071ae10 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -364,26 +364,28 @@ def test_match(): def test_fullmatch(): # GH 32806 - values = Series(["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"]) - result = values.str.fullmatch(".*BAD[_]+.*BAD") - exp = Series([True, False, np.nan, False]) - tm.assert_series_equal(result, exp) - - # Make sure that the new string arrays work - string_values = Series( - ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype="string" - ) - result = string_values.str.fullmatch(".*BAD[_]+.*BAD") - # Result is nullable boolean with StringDtype - string_exp = Series([True, False, np.nan, False], dtype="boolean") - tm.assert_series_equal(result, string_exp) + ser = Series(["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"]) + result = ser.str.fullmatch(".*BAD[_]+.*BAD") + expected = Series([True, False, np.nan, False]) + tm.assert_series_equal(result, expected) - values = Series(["ab", "AB", "abc", "ABC"]) - result = values.str.fullmatch("ab", case=False) + ser = Series(["ab", "AB", "abc", "ABC"]) + result = ser.str.fullmatch("ab", case=False) expected = Series([True, True, False, False]) tm.assert_series_equal(result, expected) +def test_fullmatch_nullable_string_dtype(nullable_string_dtype): + ser = Series( + ["fooBAD__barBAD", "BAD_BADleroybrown", None, "foo"], + dtype=nullable_string_dtype, + ) + result = ser.str.fullmatch(".*BAD[_]+.*BAD") + # Result is nullable boolean + expected = Series([True, False, np.nan, False], dtype="boolean") + tm.assert_series_equal(result, expected) + + def test_findall(): values = Series(["fooBAD__barBAD", np.nan, "foo", "BAD"]) From 50191a01c22f92530af672936621c8204891a9e9 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 15 Apr 2021 16:55:29 +0100 Subject: [PATCH 4/4] test_repeat_with_null --- pandas/tests/strings/test_strings.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 95ac237597bc4..a809446f0bc06 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -136,17 +136,23 @@ def test_repeat(): tm.assert_series_equal(rs, xp) -def test_repeat_with_null(): +def test_repeat_with_null(nullable_string_dtype, request): # GH: 31632 - values = Series(["a", None], dtype="string") - result = values.str.repeat([3, 4]) - exp = Series(["aaa", None], dtype="string") - tm.assert_series_equal(result, exp) - values = Series(["a", "b"], dtype="string") - result = values.str.repeat([3, None]) - exp = Series(["aaa", None], dtype="string") - tm.assert_series_equal(result, exp) + if nullable_string_dtype == "arrow_string": + reason = 'Attribute "dtype" are different' + mark = pytest.mark.xfail(reason=reason) + request.node.add_marker(mark) + + ser = Series(["a", None], dtype=nullable_string_dtype) + result = ser.str.repeat([3, 4]) + expected = Series(["aaa", None], dtype=nullable_string_dtype) + tm.assert_series_equal(result, expected) + + ser = Series(["a", "b"], dtype=nullable_string_dtype) + result = ser.str.repeat([3, None]) + expected = Series(["aaa", None], dtype=nullable_string_dtype) + tm.assert_series_equal(result, expected) def test_empty_str_methods():