pandas-dev · jreback · Sep 3, 2021 · Aug 30, 2021 · Sep 2, 2021
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -1090,7 +1090,9 @@ def _isindex(colspec):
                     colspec = orig_names[colspec]
                 if _isindex(colspec):
                     continue
-                data_dict[colspec] = converter(data_dict[colspec])
+                # Pyarrow engine returns Series which we need to convert to
+                # numpy array before converter, its a no-op for other parsers
+                data_dict[colspec] = converter(np.asarray(data_dict[colspec]))
             else:
                 new_name, col, old_names = _try_convert_dates(
                     converter, colspec, data_dict, orig_names
@@ -1139,7 +1141,7 @@ def _try_convert_dates(parser: Callable, colspec, data_dict, columns):
             colnames.append(c)
 
     new_name = "_".join([str(x) for x in colnames])
-    to_parse = [data_dict[c] for c in colnames if c in data_dict]
+    to_parse = [np.asarray(data_dict[c]) for c in colnames if c in data_dict]
 
     new_col = parser(*to_parse)
     return new_name, new_col, colnames

diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -42,7 +42,7 @@
 import pandas.io.date_converters as conv
 from pandas.io.parsers import read_csv
 
-pytestmark = pytest.mark.usefixtures("pyarrow_skip")
+xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 # constant
 _DEFAULT_DATETIME = datetime(1, 1, 1)
@@ -54,6 +54,7 @@
     date_strategy = st.datetimes()
 
 
+@xfail_pyarrow
 def test_read_csv_with_custom_date_parser(all_parsers):
     # GH36111
     def __custom_date_parser(time):
@@ -91,6 +92,7 @@ def __custom_date_parser(time):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_separator_date_conflict(all_parsers):
     # Regression test for gh-4678
     #
@@ -112,6 +114,7 @@ def test_separator_date_conflict(all_parsers):
     tm.assert_frame_equal(df, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize("keep_date_col", [True, False])
 def test_multiple_date_col_custom(all_parsers, keep_date_col):
     data = """\
@@ -267,6 +270,7 @@ def test_concat_date_col_fail(container, dim):
         parsing.concat_date_cols(date_cols)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize("keep_date_col", [True, False])
 def test_multiple_date_col(all_parsers, keep_date_col):
     data = """\
@@ -426,6 +430,7 @@ def test_date_col_as_index_col(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "date_parser, warning",
     ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
@@ -490,6 +495,7 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_multiple_date_col_timestamp_parse(all_parsers):
     parser = all_parsers
     data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
@@ -524,6 +530,7 @@ def test_multiple_date_col_timestamp_parse(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_multiple_date_cols_with_header(all_parsers):
     parser = all_parsers
     data = """\
@@ -693,6 +700,7 @@ def test_date_parser_int_bug(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_nat_parse(all_parsers):
     # see gh-3062
     parser = all_parsers
@@ -708,6 +716,7 @@ def test_nat_parse(all_parsers):
         tm.assert_frame_equal(result, df)
 
 
+@xfail_pyarrow
 def test_csv_custom_parser(all_parsers):
     data = """A,B,C
 20090101,a,1,2
@@ -722,6 +731,7 @@ def test_csv_custom_parser(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_parse_dates_implicit_first_col(all_parsers):
     data = """A,B,C
 20090101,a,1,2
@@ -735,6 +745,7 @@ def test_parse_dates_implicit_first_col(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_parse_dates_string(all_parsers):
     data = """date,A,B,C
 20090101,a,1,2
@@ -779,6 +790,7 @@ def test_yy_format_with_year_first(all_parsers, parse_dates):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize("parse_dates", [[0, 2], ["a", "c"]])
 def test_parse_dates_column_list(all_parsers, parse_dates):
     data = "a,b,c\n01/01/2010,1,15/02/2010"
@@ -795,6 +807,7 @@ def test_parse_dates_column_list(all_parsers, parse_dates):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize("index_col", [[0, 1], [1, 0]])
 def test_multi_index_parse_dates(all_parsers, index_col):
     data = """index1,index2,A,B,C
@@ -840,6 +853,7 @@ def test_multi_index_parse_dates(all_parsers, index_col):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize("kwargs", [{"dayfirst": True}, {"day_first": True}])
 def test_parse_dates_custom_euro_format(all_parsers, kwargs):
     parser = all_parsers
@@ -884,6 +898,7 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
             )
 
 
+@xfail_pyarrow
 def test_parse_tz_aware(all_parsers):
     # See gh-1693
     parser = all_parsers
@@ -897,6 +912,7 @@ def test_parse_tz_aware(all_parsers):
     assert result.index.tz is pytz.utc
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "parse_dates,index_col",
     [({"nominal": [1, 2]}, "nominal"), ({"nominal": [1, 2]}, 0), ([[1, 2]], 0)],
@@ -997,6 +1013,7 @@ def test_multiple_date_cols_index(all_parsers, parse_dates, index_col):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_multiple_date_cols_chunked(all_parsers):
     parser = all_parsers
     data = """\
@@ -1089,6 +1106,7 @@ def test_multiple_date_cols_chunked(all_parsers):
     tm.assert_frame_equal(chunks[2], expected[4:])
 
 
+@xfail_pyarrow
 def test_multiple_date_col_named_index_compat(all_parsers):
     parser = all_parsers
     data = """\
@@ -1112,6 +1130,7 @@ def test_multiple_date_col_named_index_compat(all_parsers):
     tm.assert_frame_equal(with_indices, with_names)
 
 
+@xfail_pyarrow
 def test_multiple_date_col_multiple_index_compat(all_parsers):
     parser = all_parsers
     data = """\
@@ -1179,6 +1198,7 @@ def test_bad_date_parse(all_parsers, cache_dates, value):
     )
 
 
+@xfail_pyarrow
 def test_parse_dates_empty_string(all_parsers):
     # see gh-2263
     parser = all_parsers
@@ -1191,6 +1211,7 @@ def test_parse_dates_empty_string(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "data,kwargs,expected",
     [
@@ -1230,6 +1251,7 @@ def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "date_parser, warning",
     ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
@@ -1258,6 +1280,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "date_parser, warning",
     ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
@@ -1346,6 +1369,7 @@ def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warni
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "date_parser, warning",
     ([conv.parse_date_fields, FutureWarning], [pd.to_datetime, None]),
@@ -1368,6 +1392,7 @@ def test_parse_date_fields(all_parsers, date_parser, warning):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "date_parser, warning",
     (
@@ -1399,6 +1424,7 @@ def test_parse_date_all_fields(all_parsers, date_parser, warning):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "date_parser, warning",
     (
@@ -1430,6 +1456,7 @@ def test_datetime_fractional_seconds(all_parsers, date_parser, warning):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_generic(all_parsers):
     parser = all_parsers
     data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
@@ -1448,6 +1475,7 @@ def test_generic(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_date_parser_resolution_if_not_ns(all_parsers):
     # see gh-10245
     parser = all_parsers
@@ -1545,6 +1573,7 @@ def test_parse_timezone(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "date_string",
     ["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"],
@@ -1556,6 +1585,7 @@ def test_invalid_parse_delimited_date(all_parsers, date_string):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "date_string,dayfirst,expected",
     [
@@ -1578,6 +1608,7 @@ def test_parse_delimited_date_swap_no_warning(
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "date_string,dayfirst,expected",
     [
@@ -1647,6 +1678,7 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti
     assert result == expected
 
 
+@xfail_pyarrow
 @pytest.mark.parametrize(
     "names, usecols, parse_dates, missing_cols",
     [
@@ -1679,6 +1711,7 @@ def test_missing_parse_dates_column_raises(
         )
 
 
+@xfail_pyarrow
 def test_date_parser_and_names(all_parsers):
     # GH#33699
     parser = all_parsers
@@ -1688,6 +1721,7 @@ def test_date_parser_and_names(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
 def test_date_parser_usecols_thousands(all_parsers):
     # GH#39365
     data = """A,B,C