diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 6737f2dfd5086..30ee47862502e 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1090,7 +1090,9 @@ def _isindex(colspec): colspec = orig_names[colspec] if _isindex(colspec): continue - data_dict[colspec] = converter(data_dict[colspec]) + # Pyarrow engine returns Series which we need to convert to + # numpy array before converter, its a no-op for other parsers + data_dict[colspec] = converter(np.asarray(data_dict[colspec])) else: new_name, col, old_names = _try_convert_dates( converter, colspec, data_dict, orig_names @@ -1139,7 +1141,7 @@ def _try_convert_dates(parser: Callable, colspec, data_dict, columns): colnames.append(c) new_name = "_".join([str(x) for x in colnames]) - to_parse = [data_dict[c] for c in colnames if c in data_dict] + to_parse = [np.asarray(data_dict[c]) for c in colnames if c in data_dict] new_col = parser(*to_parse) return new_name, new_col, colnames diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 5fbd1db9df96d..6ef6035b96fbe 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -42,7 +42,7 @@ import pandas.io.date_converters as conv from pandas.io.parsers import read_csv -pytestmark = pytest.mark.usefixtures("pyarrow_skip") +xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") # constant _DEFAULT_DATETIME = datetime(1, 1, 1) @@ -54,6 +54,7 @@ date_strategy = st.datetimes() +@xfail_pyarrow def test_read_csv_with_custom_date_parser(all_parsers): # GH36111 def __custom_date_parser(time): @@ -91,6 +92,7 @@ def __custom_date_parser(time): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_separator_date_conflict(all_parsers): # Regression test for gh-4678 # @@ -112,6 +114,7 @@ def test_separator_date_conflict(all_parsers): tm.assert_frame_equal(df, expected) +@xfail_pyarrow @pytest.mark.parametrize("keep_date_col", [True, False]) def test_multiple_date_col_custom(all_parsers, keep_date_col): data = """\ @@ -267,6 +270,7 @@ def test_concat_date_col_fail(container, dim): parsing.concat_date_cols(date_cols) +@xfail_pyarrow @pytest.mark.parametrize("keep_date_col", [True, False]) def test_multiple_date_col(all_parsers, keep_date_col): data = """\ @@ -426,6 +430,7 @@ def test_date_col_as_index_col(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "date_parser, warning", ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), @@ -490,6 +495,7 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_multiple_date_col_timestamp_parse(all_parsers): parser = all_parsers data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 @@ -524,6 +530,7 @@ def test_multiple_date_col_timestamp_parse(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_multiple_date_cols_with_header(all_parsers): parser = all_parsers data = """\ @@ -693,6 +700,7 @@ def test_date_parser_int_bug(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_nat_parse(all_parsers): # see gh-3062 parser = all_parsers @@ -708,6 +716,7 @@ def test_nat_parse(all_parsers): tm.assert_frame_equal(result, df) +@xfail_pyarrow def test_csv_custom_parser(all_parsers): data = """A,B,C 20090101,a,1,2 @@ -722,6 +731,7 @@ def test_csv_custom_parser(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_parse_dates_implicit_first_col(all_parsers): data = """A,B,C 20090101,a,1,2 @@ -735,6 +745,7 @@ def test_parse_dates_implicit_first_col(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_parse_dates_string(all_parsers): data = """date,A,B,C 20090101,a,1,2 @@ -779,6 +790,7 @@ def test_yy_format_with_year_first(all_parsers, parse_dates): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize("parse_dates", [[0, 2], ["a", "c"]]) def test_parse_dates_column_list(all_parsers, parse_dates): data = "a,b,c\n01/01/2010,1,15/02/2010" @@ -795,6 +807,7 @@ def test_parse_dates_column_list(all_parsers, parse_dates): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize("index_col", [[0, 1], [1, 0]]) def test_multi_index_parse_dates(all_parsers, index_col): data = """index1,index2,A,B,C @@ -840,6 +853,7 @@ def test_multi_index_parse_dates(all_parsers, index_col): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize("kwargs", [{"dayfirst": True}, {"day_first": True}]) def test_parse_dates_custom_euro_format(all_parsers, kwargs): parser = all_parsers @@ -884,6 +898,7 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs): ) +@xfail_pyarrow def test_parse_tz_aware(all_parsers): # See gh-1693 parser = all_parsers @@ -897,6 +912,7 @@ def test_parse_tz_aware(all_parsers): assert result.index.tz is pytz.utc +@xfail_pyarrow @pytest.mark.parametrize( "parse_dates,index_col", [({"nominal": [1, 2]}, "nominal"), ({"nominal": [1, 2]}, 0), ([[1, 2]], 0)], @@ -997,6 +1013,7 @@ def test_multiple_date_cols_index(all_parsers, parse_dates, index_col): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_multiple_date_cols_chunked(all_parsers): parser = all_parsers data = """\ @@ -1089,6 +1106,7 @@ def test_multiple_date_cols_chunked(all_parsers): tm.assert_frame_equal(chunks[2], expected[4:]) +@xfail_pyarrow def test_multiple_date_col_named_index_compat(all_parsers): parser = all_parsers data = """\ @@ -1112,6 +1130,7 @@ def test_multiple_date_col_named_index_compat(all_parsers): tm.assert_frame_equal(with_indices, with_names) +@xfail_pyarrow def test_multiple_date_col_multiple_index_compat(all_parsers): parser = all_parsers data = """\ @@ -1179,6 +1198,7 @@ def test_bad_date_parse(all_parsers, cache_dates, value): ) +@xfail_pyarrow def test_parse_dates_empty_string(all_parsers): # see gh-2263 parser = all_parsers @@ -1191,6 +1211,7 @@ def test_parse_dates_empty_string(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "data,kwargs,expected", [ @@ -1230,6 +1251,7 @@ def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "date_parser, warning", ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), @@ -1258,6 +1280,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "date_parser, warning", ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), @@ -1346,6 +1369,7 @@ def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warni tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "date_parser, warning", ([conv.parse_date_fields, FutureWarning], [pd.to_datetime, None]), @@ -1368,6 +1392,7 @@ def test_parse_date_fields(all_parsers, date_parser, warning): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "date_parser, warning", ( @@ -1399,6 +1424,7 @@ def test_parse_date_all_fields(all_parsers, date_parser, warning): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "date_parser, warning", ( @@ -1430,6 +1456,7 @@ def test_datetime_fractional_seconds(all_parsers, date_parser, warning): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_generic(all_parsers): parser = all_parsers data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." @@ -1448,6 +1475,7 @@ def test_generic(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_date_parser_resolution_if_not_ns(all_parsers): # see gh-10245 parser = all_parsers @@ -1545,6 +1573,7 @@ def test_parse_timezone(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "date_string", ["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"], @@ -1556,6 +1585,7 @@ def test_invalid_parse_delimited_date(all_parsers, date_string): tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "date_string,dayfirst,expected", [ @@ -1578,6 +1608,7 @@ def test_parse_delimited_date_swap_no_warning( tm.assert_frame_equal(result, expected) +@xfail_pyarrow @pytest.mark.parametrize( "date_string,dayfirst,expected", [ @@ -1647,6 +1678,7 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti assert result == expected +@xfail_pyarrow @pytest.mark.parametrize( "names, usecols, parse_dates, missing_cols", [ @@ -1679,6 +1711,7 @@ def test_missing_parse_dates_column_raises( ) +@xfail_pyarrow def test_date_parser_and_names(all_parsers): # GH#33699 parser = all_parsers @@ -1688,6 +1721,7 @@ def test_date_parser_and_names(all_parsers): tm.assert_frame_equal(result, expected) +@xfail_pyarrow def test_date_parser_usecols_thousands(all_parsers): # GH#39365 data = """A,B,C