From ee2b58befbbc3e2b3fc0a8e26bb30b64583dbc45 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Nov 2024 14:05:44 +0100 Subject: [PATCH 1/2] TST (string dtype): resolve xfail in arrow interface tests --- pandas/tests/frame/test_arrow_interface.py | 28 +++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_arrow_interface.py b/pandas/tests/frame/test_arrow_interface.py index dc163268f64b9..0d337396ea231 100644 --- a/pandas/tests/frame/test_arrow_interface.py +++ b/pandas/tests/frame/test_arrow_interface.py @@ -2,8 +2,6 @@ import pytest -from pandas._config import using_string_dtype - import pandas.util._test_decorators as td import pandas as pd @@ -11,9 +9,8 @@ pa = pytest.importorskip("pyarrow") -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @td.skip_if_no("pyarrow", min_version="14.0") -def test_dataframe_arrow_interface(): +def test_dataframe_arrow_interface(using_infer_string): df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) capsule = df.__arrow_c_stream__() @@ -25,7 +22,15 @@ def test_dataframe_arrow_interface(): ) table = pa.table(df) - expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]}) + expected = pa.table( + { + "a": [1, 2, 3], + "b": pa.array( + ["a", "b", "c"], + pa.large_string() if using_infer_string else pa.string(), + ), + } + ) assert table.equals(expected) schema = pa.schema([("a", pa.int8()), ("b", pa.string())]) @@ -34,13 +39,20 @@ def test_dataframe_arrow_interface(): assert table.equals(expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @td.skip_if_no("pyarrow", min_version="15.0") -def test_dataframe_to_arrow(): +def test_dataframe_to_arrow(using_infer_string): df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) table = pa.RecordBatchReader.from_stream(df).read_all() - expected = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]}) + expected = pa.table( + { + "a": [1, 2, 3], + "b": pa.array( + ["a", "b", "c"], + pa.large_string() if using_infer_string else pa.string(), + ), + } + ) assert table.equals(expected) schema = pa.schema([("a", pa.int8()), ("b", pa.string())]) From feb183c92c4fdb617b815c7fdcd4723d616462ed Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Nov 2024 14:07:20 +0100 Subject: [PATCH 2/2] reformat --- pandas/tests/frame/test_arrow_interface.py | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/pandas/tests/frame/test_arrow_interface.py b/pandas/tests/frame/test_arrow_interface.py index 0d337396ea231..b36b6b5ffe0cc 100644 --- a/pandas/tests/frame/test_arrow_interface.py +++ b/pandas/tests/frame/test_arrow_interface.py @@ -22,15 +22,8 @@ def test_dataframe_arrow_interface(using_infer_string): ) table = pa.table(df) - expected = pa.table( - { - "a": [1, 2, 3], - "b": pa.array( - ["a", "b", "c"], - pa.large_string() if using_infer_string else pa.string(), - ), - } - ) + string_type = pa.large_string() if using_infer_string else pa.string() + expected = pa.table({"a": [1, 2, 3], "b": pa.array(["a", "b", "c"], string_type)}) assert table.equals(expected) schema = pa.schema([("a", pa.int8()), ("b", pa.string())]) @@ -44,15 +37,8 @@ def test_dataframe_to_arrow(using_infer_string): df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}) table = pa.RecordBatchReader.from_stream(df).read_all() - expected = pa.table( - { - "a": [1, 2, 3], - "b": pa.array( - ["a", "b", "c"], - pa.large_string() if using_infer_string else pa.string(), - ), - } - ) + string_type = pa.large_string() if using_infer_string else pa.string() + expected = pa.table({"a": [1, 2, 3], "b": pa.array(["a", "b", "c"], string_type)}) assert table.equals(expected) schema = pa.schema([("a", pa.int8()), ("b", pa.string())])