From f2d9eb93670e83e4a96019204ff7fa4d1bb0003a Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 27 Jan 2023 10:43:07 +0000 Subject: [PATCH 01/16] wip --- doc/source/user_guide/io.rst | 10 ++ pandas/io/excel/_base.py | 10 ++ pandas/io/parsers/base_parser.py | 5 + pandas/io/parsers/readers.py | 43 ++++++++- pandas/tests/io/parser/test_parse_dates.py | 105 +++++++++++++++++---- 5 files changed, 148 insertions(+), 25 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index dc21b9f35d272..fedfeb9187022 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -290,6 +290,16 @@ date_parser : function, default ``None`` values from the columns defined by parse_dates into a single array and pass that; and 3) call date_parser once for each row using one or more strings (corresponding to the columns defined by parse_dates) as arguments. + + .. deprecated:: 2.0.0 + Use ``date_format`` instead, or read in as ``object`` and then apply + :func:`to_datetime` as-needed. +date_format : str, default ``None`` + If used in conjunction with ``parse_dates``, will parse dates according to this + format. For anything more complex (e.g. different formats for different columns), + please read in as ``object`` and then apply :func:`to_datetime` as-needed. + + .. versionadded:: 2.0.0 dayfirst : boolean, default ``False`` DD/MM format dates, international and European format. cache_dates : boolean, default True diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 6fb6d72bab099..7852d5f2171ab 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -250,6 +250,16 @@ and pass that; and 3) call `date_parser` once for each row using one or more strings (corresponding to the columns defined by `parse_dates`) as arguments. + + .. deprecated:: 2.0.0 + Use ``date_format`` instead, or read in as ``object`` and then apply + :func:`to_datetime` as-needed. +date_format : str, default ``None`` + If used in conjunction with ``parse_dates``, will parse dates according to this + format. For anything more complex (e.g. different formats for different columns), + please read in as ``object`` and then apply :func:`to_datetime` as-needed. + + .. versionadded:: 2.0.0 thousands : str, default None Thousands separator for parsing string columns to numeric. Note that this parameter is only necessary for columns stored as TEXT in Excel, diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 6272f213ccef1..543b10c3140c7 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -116,6 +116,7 @@ def __init__(self, kwds) -> None: self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False)) self._parse_date_cols: Iterable = [] self.date_parser = kwds.pop("date_parser", None) + self.date_format = kwds.pop("date_format", None) self.dayfirst = kwds.pop("dayfirst", False) self.keep_date_col = kwds.pop("keep_date_col", False) @@ -134,6 +135,7 @@ def __init__(self, kwds) -> None: self._date_conv = _make_date_converter( date_parser=self.date_parser, + date_format=self.date_format, dayfirst=self.dayfirst, cache_dates=self.cache_dates, ) @@ -1092,6 +1094,7 @@ def _make_date_converter( date_parser=None, dayfirst: bool = False, cache_dates: bool = True, + date_format=None, ): def converter(*date_cols): if date_parser is None: @@ -1099,6 +1102,7 @@ def converter(*date_cols): return tools.to_datetime( ensure_object(strs), + format=date_format, utc=False, dayfirst=dayfirst, errors="ignore", @@ -1153,6 +1157,7 @@ def converter(*date_cols): "keep_date_col": False, "dayfirst": False, "date_parser": None, + "date_format": None, "usecols": None, # 'iterator': False, "chunksize": None, diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 410b4fc0bf9c0..6eff217a6c71f 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -239,10 +239,7 @@ say because of an unparsable value or a mixture of timezones, the column or index will be returned unaltered as an object data type. For non-standard datetime parsing, use ``pd.to_datetime`` after - ``pd.read_csv``. To parse an index or column with a mixture of timezones, - specify ``date_parser`` to be a partially-applied - :func:`pandas.to_datetime` with ``utc=True``. See - :ref:`io.csv.mixed_timezones` for more. + ``pd.read_csv``. Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : bool, default False @@ -267,6 +264,16 @@ and pass that; and 3) call `date_parser` once for each row using one or more strings (corresponding to the columns defined by `parse_dates`) as arguments. + + .. deprecated:: 2.0.0 + Use ``date_format`` instead, or read in as ``object`` and then apply + :func:`to_datetime` as-needed. +date_format : str, default ``None`` + If used in conjunction with ``parse_dates``, will parse dates according to this + format. For anything more complex (e.g. different formats for different columns), + please read in as ``object`` and then apply :func:`to_datetime` as-needed. + + .. versionadded:: 2.0.0 dayfirst : bool, default False DD/MM format dates, international and European format. cache_dates : bool, default True @@ -546,7 +553,7 @@ def _read( # if we pass a date_parser and parse_dates=False, we should not parse the # dates GH#44366 if kwds.get("parse_dates", None) is None: - if kwds.get("date_parser", None) is None: + if kwds.get("date_parser", None) is None and kwds.get("date_format") is None: kwds["parse_dates"] = False else: kwds["parse_dates"] = True @@ -620,6 +627,7 @@ def read_csv( infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., date_parser=..., + date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., iterator: Literal[True], @@ -676,6 +684,7 @@ def read_csv( infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., date_parser=..., + date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., iterator: bool = ..., @@ -732,6 +741,7 @@ def read_csv( infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., date_parser=..., + date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., iterator: Literal[False] = ..., @@ -788,6 +798,7 @@ def read_csv( infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., date_parser=..., + date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., iterator: bool = ..., @@ -856,6 +867,7 @@ def read_csv( infer_datetime_format: bool | lib.NoDefault = lib.no_default, keep_date_col: bool = False, date_parser=None, + date_format: str | None = None, dayfirst: bool = False, cache_dates: bool = True, # Iteration @@ -943,6 +955,7 @@ def read_table( infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., date_parser=..., + date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., iterator: Literal[True], @@ -999,6 +1012,7 @@ def read_table( infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., date_parser=..., + date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., iterator: bool = ..., @@ -1055,6 +1069,7 @@ def read_table( infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., date_parser=..., + date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., iterator: Literal[False] = ..., @@ -1111,6 +1126,7 @@ def read_table( infer_datetime_format: bool | lib.NoDefault = ..., keep_date_col: bool = ..., date_parser=..., + date_format: str | None = ..., dayfirst: bool = ..., cache_dates: bool = ..., iterator: bool = ..., @@ -1179,6 +1195,7 @@ def read_table( infer_datetime_format: bool | lib.NoDefault = lib.no_default, keep_date_col: bool = False, date_parser=None, + date_format: str | None = None, dayfirst: bool = False, cache_dates: bool = True, # Iteration @@ -1207,6 +1224,17 @@ def read_table( storage_options: StorageOptions = None, use_nullable_dtypes: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | TextFileReader: + if date_parser is not None: + warnings.warn( + "The argument 'date_parser' is deprecated and will " + "be removed in a future version. " + "Please use 'date_format' instead, or read your data in as 'object' dtype " + "and then call 'to_datetime'.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if date_parser is not None and date_format is not None: + raise TypeError("Cannot use both 'date_parser' and 'date_format'") # locals() should never be modified kwds = locals().copy() del kwds["filepath_or_buffer"] @@ -1762,6 +1790,11 @@ def TextParser(*args, **kwds) -> TextFileReader: parse_dates : bool, default False keep_date_col : bool, default False date_parser : function, optional + + .. deprecated:: 2.0.0 + date_format : str, default ``None`` + + .. versionadded:: 2.0.0 skiprows : list of integers Row numbers to skip skipfooter : int diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index fc477a899d089..1a26fd06de8ad 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -63,7 +63,9 @@ def __custom_date_parser(time): 41051.00 -98573.7302 871458.0640 389.0086 """ ) - result = all_parsers.read_csv( + result = all_parsers.read_csv_check_warnings( + FutureWarning, + "Please use 'date_format' instead", testdata, delim_whitespace=True, parse_dates=True, @@ -101,7 +103,9 @@ def __custom_date_parser(time): 41051.00 -97.72 """ ) - result = all_parsers.read_csv( + result = all_parsers.read_csv_check_warnings( + FutureWarning, + "Please use 'date_format' instead", testdata, delim_whitespace=True, parse_dates=False, @@ -176,7 +180,12 @@ def date_parser(*date_cols): "keep_date_col": keep_date_col, "names": ["X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8"], } - result = parser.read_csv(StringIO(data), **kwds) + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", + StringIO(data), + **kwds, + ) expected = DataFrame( [ @@ -482,7 +491,9 @@ def test_multiple_date_cols_int_cast(all_parsers): "parse_dates": parse_dates, "date_parser": pd.to_datetime, } - result = parser.read_csv(StringIO(data), **kwds) + result = parser.read_csv_check_warnings( + FutureWarning, "use 'date_format' instead", StringIO(data), **kwds + ) expected = DataFrame( [ @@ -529,8 +540,13 @@ def test_multiple_date_col_timestamp_parse(all_parsers): data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" - result = parser.read_csv( - StringIO(data), parse_dates=[[0, 1]], header=None, date_parser=Timestamp + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", + StringIO(data), + parse_dates=[[0, 1]], + header=None, + date_parser=Timestamp, ) expected = DataFrame( [ @@ -686,7 +702,9 @@ def test_date_parser_int_bug(all_parsers): "12345,1,-1,3,invoice_InvoiceResource,search\n" ) - result = parser.read_csv( + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), index_col=0, parse_dates=[0], @@ -752,8 +770,11 @@ def test_csv_custom_parser(all_parsers): 20090103,c,4,5 """ parser = all_parsers - result = parser.read_csv( - StringIO(data), date_parser=lambda x: datetime.strptime(x, "%Y%m%d") + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", + StringIO(data), + date_parser=lambda x: datetime.strptime(x, "%Y%m%d"), ) expected = parser.read_csv(StringIO(data), parse_dates=True) tm.assert_frame_equal(result, expected) @@ -903,7 +924,9 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs): 02/02/2010,1,2 """ if "dayfirst" in kwargs: - df = parser.read_csv( + df = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: du_parse(d, **kwargs), @@ -925,7 +948,9 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs): else: msg = "got an unexpected keyword argument 'day_first'" with pytest.raises(TypeError, match=msg): - parser.read_csv( + parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), names=["time", "Q", "NTU"], date_parser=lambda d: du_parse(d, **kwargs), @@ -1295,7 +1320,7 @@ def test_parse_dates_infer_datetime_format_warning(all_parsers): parser = all_parsers data = "Date,test\n2012-01-01,1\n,2" parser.read_csv_check_warnings( - UserWarning, + FutureWarning, "The argument 'infer_datetime_format' is deprecated", StringIO(data), parse_dates=["Date"], @@ -1303,6 +1328,26 @@ def test_parse_dates_infer_datetime_format_warning(all_parsers): ) +@pytest.mark.parametrize( + "reader", ["read_csv_check_warnings", "read_table_check_warnings"] +) +def test_parse_dates_date_parser_and_date_format(all_parsers, reader): + # GH ??? + parser = all_parsers + data = "Date,test\n2012-01-01,1\n,2" + msg = "Cannot use both 'date_parser' and 'date_format'" + with pytest.raises(TypeError, match=msg): + getattr(parser, reader)( + FutureWarning, + "use 'date_format' instead", + StringIO(data), + parse_dates=["Date"], + date_parser=pd.to_datetime, + date_format="ISO8601", + sep=",", + ) + + @xfail_pyarrow @pytest.mark.parametrize( "data,kwargs,expected", @@ -1353,7 +1398,9 @@ def test_parse_date_time_multi_level_column_name(all_parsers): 2001-01-06, 00:00:00, 1.0, 11. """ parser = all_parsers - result = parser.read_csv( + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), header=[0, 1], parse_dates={"date_time": [0, 1]}, @@ -1443,7 +1490,13 @@ def test_parse_date_time_multi_level_column_name(all_parsers): ) def test_parse_date_time(all_parsers, data, kwargs, expected): parser = all_parsers - result = parser.read_csv(StringIO(data), date_parser=pd.to_datetime, **kwargs) + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", + StringIO(data), + date_parser=pd.to_datetime, + **kwargs, + ) # Python can sometimes be flaky about how # the aggregated columns are entered, so @@ -1458,7 +1511,9 @@ def test_parse_date_time(all_parsers, data, kwargs, expected): def test_parse_date_fields(all_parsers): parser = all_parsers data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." - result = parser.read_csv( + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), header=0, parse_dates={"ymd": [0, 1, 2]}, @@ -1480,7 +1535,9 @@ def test_parse_date_all_fields(all_parsers): 2001,01,05,10,00,0,0.0,10. 2001,01,5,10,0,00,1.,11. """ - result = parser.read_csv( + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), header=0, date_parser=lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"), @@ -1504,7 +1561,9 @@ def test_datetime_fractional_seconds(all_parsers): 2001,01,05,10,00,0.123456,0.0,10. 2001,01,5,10,0,0.500000,1.,11. """ - result = parser.read_csv( + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), header=0, date_parser=lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"), @@ -1528,7 +1587,9 @@ def test_generic(all_parsers): def parse_function(yy, mm): return [date(year=int(y), month=int(m), day=1) for y, m in zip(yy, mm)] - result = parser.read_csv( + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), header=0, parse_dates={"ym": [0, 1]}, @@ -1561,7 +1622,9 @@ def date_parser(dt, time): arr = [datetime.combine(d, t) for d, t in zip(dt, time)] return np.array(arr, dtype="datetime64[s]") - result = parser.read_csv( + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), date_parser=date_parser, parse_dates={"datetime": ["date", "time"]}, @@ -1997,7 +2060,9 @@ def test_replace_nans_before_parsing_dates(all_parsers): # 2017-09-09 """ - result = parser.read_csv( + result = parser.read_csv_check_warnings( + FutureWarning, + "use 'date_format' instead", StringIO(data), na_values={"Test": ["#", "0"]}, parse_dates=["Test"], From ab535301623f51188f5fe38b432a568cd2550027 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 27 Jan 2023 10:57:13 +0000 Subject: [PATCH 02/16] fixup --- pandas/io/parsers/readers.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 6eff217a6c71f..1e7ec3d1b4afd 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -903,8 +903,20 @@ def read_csv( "A strict version of it is now the default, see " "https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. " "You can safely remove this argument.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if date_parser is not None: + warnings.warn( + "The argument 'date_parser' is deprecated and will " + "be removed in a future version. " + "Please use 'date_format' instead, or read your data in as 'object' dtype " + "and then call 'to_datetime'.", + FutureWarning, stacklevel=find_stack_level(), ) + if date_parser is not None and date_format is not None: + raise TypeError("Cannot use both 'date_parser' and 'date_format'") # locals() should never be modified kwds = locals().copy() del kwds["filepath_or_buffer"] From c855c4b44d3cb2122dc992b7713e1481e2ff6fe0 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 27 Jan 2023 11:01:58 +0000 Subject: [PATCH 03/16] update user guide --- doc/source/user_guide/io.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index fedfeb9187022..b9375af54027a 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -929,12 +929,12 @@ an exception is raised, the next one is tried: Note that performance-wise, you should try these methods of parsing dates in order: -1. If you know the format, use ``pd.to_datetime()``: - ``date_parser=lambda x: pd.to_datetime(x, format=...)``. +1. If you know the format, use ``date_format``, e.g.: + ``date_format="%d/%m/%Y"``. -2. If you have a really non-standard format, use a custom ``date_parser`` function. - For optimal performance, this should be vectorized, i.e., it should accept arrays - as arguments. +2. If you different formats for different columns, or want to pass any extra options (such + as ``utc``) to ``to_datetime``, then you should read in your data as ``object`` dtype, and + then use ``to_datetime``. .. ipython:: python From 389dd718939a0ec933c71d6369878f5ab239ed04 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 27 Jan 2023 11:02:59 +0000 Subject: [PATCH 04/16] whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c1d9b2744b27e..59ec643ab5fd0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -639,6 +639,7 @@ Deprecations - :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`) - :meth:`Index.is_object` has been deprecated. Use :func:`pandas.api.types.is_object_dtype` instead (:issue:`50042`) - :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`) +- Deprecated argument ``date_parser`` in :func:`read_csv` and :func:`read_table` in favour of ``date_format`` (:issue:`50601`) - .. --------------------------------------------------------------------------- From 2e6fbcbdd175f673a16ab402f96763229cee11f1 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 27 Jan 2023 11:50:20 +0000 Subject: [PATCH 05/16] gh number --- pandas/tests/io/parser/test_parse_dates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1a26fd06de8ad..1ab1baaaad1db 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1332,7 +1332,7 @@ def test_parse_dates_infer_datetime_format_warning(all_parsers): "reader", ["read_csv_check_warnings", "read_table_check_warnings"] ) def test_parse_dates_date_parser_and_date_format(all_parsers, reader): - # GH ??? + # GH 50601 parser = all_parsers data = "Date,test\n2012-01-01,1\n,2" msg = "Cannot use both 'date_parser' and 'date_format'" From fd5e7c67750fed4bfb05aed77b33613c628ad3f7 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 27 Jan 2023 14:04:41 +0000 Subject: [PATCH 06/16] update user guide; --- doc/source/user_guide/io.rst | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index b9375af54027a..ef1ed1decfac3 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -908,26 +908,8 @@ data columns: Date parsing functions ++++++++++++++++++++++ -Finally, the parser allows you to specify a custom ``date_parser`` function to -take full advantage of the flexibility of the date parsing API: - -.. ipython:: python - - df = pd.read_csv( - "tmp.csv", header=None, parse_dates=date_spec, date_parser=pd.to_datetime - ) - df - -pandas will try to call the ``date_parser`` function in three different ways. If -an exception is raised, the next one is tried: - -1. ``date_parser`` is first called with one or more arrays as arguments, - as defined using ``parse_dates`` (e.g., ``date_parser(['2013', '2013'], ['1', '2'])``). - -2. If #1 fails, ``date_parser`` is called with all the columns - concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``). - -Note that performance-wise, you should try these methods of parsing dates in order: +Finally, the parser allows you to specify a custom ``date_format``. +Performance-wise, you should try these methods of parsing dates in order: 1. If you know the format, use ``date_format``, e.g.: ``date_format="%d/%m/%Y"``. @@ -962,16 +944,13 @@ an object-dtype column with strings, even with ``parse_dates``. df = pd.read_csv(StringIO(content), parse_dates=["a"]) df["a"] -To parse the mixed-timezone values as a datetime column, pass a partially-applied -:func:`to_datetime` with ``utc=True`` as the ``date_parser``. +To parse the mixed-timezone values as a datetime column, read in as ``object`` dtype and +then call :func:`to_datetime` with ``utc=True``. .. ipython:: python - df = pd.read_csv( - StringIO(content), - parse_dates=["a"], - date_parser=lambda col: pd.to_datetime(col, utc=True), - ) + df = pd.read_csv(StringIO(content)) + df["a"] = pd.to_datetime(df["a"], utc=True) df["a"] From 9594c04eb71f83c43c84d2d7b95f608b2716e81f Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 27 Jan 2023 14:17:05 +0000 Subject: [PATCH 07/16] whatsnew --- doc/source/whatsnew/v0.24.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 06356c8b02e84..0b0b31262e7af 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -687,6 +687,7 @@ As can be seen, the ``dtype`` is object; each value in the column is a string. To convert the strings to an array of datetimes, the ``date_parser`` argument .. ipython:: python + :okwarning: df = pd.read_csv(io.StringIO(content), parse_dates=['a'], date_parser=lambda col: pd.to_datetime(col, utc=True)) From d9f35f95bbb2fd205965a75d88b41f041d9aff10 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 30 Jan 2023 14:33:38 +0000 Subject: [PATCH 08/16] update whatsnew note with date_format enhancement --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 59ec643ab5fd0..ddbca335e7633 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -187,6 +187,7 @@ Other enhancements - Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`) - Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`) - Added new argument ``dtype`` to :func:`read_sql` to be consistent with :func:`read_sql_query` (:issue:`50797`) +- :func:`read_csv` and :func:`read_table` now accept ``date_format`` (:issue:`50601`) - .. --------------------------------------------------------------------------- From 5654f91942ed208318930a2f7973d8e73045cb1e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 2 Feb 2023 08:59:46 +0000 Subject: [PATCH 09/16] make example ipython code-block --- doc/source/whatsnew/v0.24.0.rst | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 0b0b31262e7af..8b850c7da37f3 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -686,12 +686,19 @@ Parsing mixed-timezones with :func:`read_csv` As can be seen, the ``dtype`` is object; each value in the column is a string. To convert the strings to an array of datetimes, the ``date_parser`` argument -.. ipython:: python - :okwarning: +.. code-block:: ipython - df = pd.read_csv(io.StringIO(content), parse_dates=['a'], - date_parser=lambda col: pd.to_datetime(col, utc=True)) - df.a + In [3]: df = pd.read_csv( + ...: io.StringIO(content), + ...: parse_dates=['a'], + ...: date_parser=lambda col: pd.to_datetime(col, utc=True), + ...: ) + + In [4]: df.a + Out[4]: + 0 1999-12-31 19:00:00+00:00 + 1 1999-12-31 18:00:00+00:00 + Name: a, dtype: datetime64[ns, UTC] See :ref:`whatsnew_0240.api.timezone_offset_parsing` for more. From f627ecedca1f28b9ea39c9c2dc7023c43ebcd71b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 2 Feb 2023 15:12:02 +0000 Subject: [PATCH 10/16] add tests for date_format --- pandas/tests/io/parser/test_parse_dates.py | 47 +++++++++++++++++----- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index bc48adcfd8fe5..ec19760c980d1 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1533,7 +1533,18 @@ def test_parse_date_fields(all_parsers): @xfail_pyarrow -def test_parse_date_all_fields(all_parsers): +@pytest.mark.parametrize( + ("key", "value", "warn"), + [ + ( + "date_parser", + lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"), + FutureWarning, + ), + ("date_format", "%Y %m %d %H %M %S", None), + ], +) +def test_parse_date_all_fields(all_parsers, key, value, warn): parser = all_parsers data = """\ year,month,day,hour,minute,second,a,b @@ -1541,12 +1552,12 @@ def test_parse_date_all_fields(all_parsers): 2001,01,5,10,0,00,1.,11. """ result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), header=0, - date_parser=lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"), parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, + **{key: value}, ) expected = DataFrame( [ @@ -1559,7 +1570,18 @@ def test_parse_date_all_fields(all_parsers): @xfail_pyarrow -def test_datetime_fractional_seconds(all_parsers): +@pytest.mark.parametrize( + ("key", "value", "warn"), + [ + ( + "date_parser", + lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"), + FutureWarning, + ), + ("date_format", "%Y %m %d %H %M %S.%f", None), + ], +) +def test_datetime_fractional_seconds(all_parsers, key, value, warn): parser = all_parsers data = """\ year,month,day,hour,minute,second,a,b @@ -1567,12 +1589,12 @@ def test_datetime_fractional_seconds(all_parsers): 2001,01,5,10,0,0.500000,1.,11. """ result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), header=0, - date_parser=lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"), parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, + **{key: value}, ) expected = DataFrame( [ @@ -2055,7 +2077,14 @@ def test_infer_first_column_as_index(all_parsers): @skip_pyarrow -def test_replace_nans_before_parsing_dates(all_parsers): +@pytest.mark.parametrize( + ("key", "value", "warn"), + [ + ("date_parser", lambda x: pd.to_datetime(x, format="%Y-%m-%d"), FutureWarning), + ("date_format", "%Y-%m-%d", None), + ], +) +def test_replace_nans_before_parsing_dates(all_parsers, key, value, warn): # GH#26203 parser = all_parsers data = """Test @@ -2066,12 +2095,12 @@ def test_replace_nans_before_parsing_dates(all_parsers): 2017-09-09 """ result = parser.read_csv_check_warnings( - FutureWarning, + warn, "use 'date_format' instead", StringIO(data), na_values={"Test": ["#", "0"]}, parse_dates=["Test"], - date_parser=lambda x: pd.to_datetime(x, format="%Y-%m-%d"), + **{key: value}, ) expected = DataFrame( { From 5390aed91c42f9e93c2d9901decff3cb7eab0a2b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 3 Feb 2023 15:32:27 +0000 Subject: [PATCH 11/16] wip add this to read_excel too --- pandas/io/excel/_base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 3b10c0ff8336d..5eb80f4d1d251 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -475,6 +475,7 @@ def read_excel( verbose: bool = False, parse_dates: list | dict | bool = False, date_parser: Callable | None = None, + date_format: str | None = None, thousands: str | None = None, decimal: str = ".", comment: str | None = None, @@ -519,6 +520,7 @@ def read_excel( verbose=verbose, parse_dates=parse_dates, date_parser=date_parser, + date_format=date_format, thousands=thousands, decimal=decimal, comment=comment, @@ -723,6 +725,7 @@ def parse( verbose: bool = False, parse_dates: list | dict | bool = False, date_parser: Callable | None = None, + date_format: str | None = None, thousands: str | None = None, decimal: str = ".", comment: str | None = None, @@ -883,6 +886,7 @@ def parse( skip_blank_lines=False, # GH 39808 parse_dates=parse_dates, date_parser=date_parser, + date_format=date_format, thousands=thousands, decimal=decimal, comment=comment, @@ -1550,6 +1554,7 @@ def parse( na_values=None, parse_dates: list | dict | bool = False, date_parser: Callable | None = None, + date_format: str | None = None, thousands: str | None = None, comment: str | None = None, skipfooter: int = 0, @@ -1582,6 +1587,7 @@ def parse( na_values=na_values, parse_dates=parse_dates, date_parser=date_parser, + date_format=date_format, thousands=thousands, comment=comment, skipfooter=skipfooter, From a7d496b6a197da7f4803afdb699c04681a6bc4a6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 8 Feb 2023 14:25:27 +0000 Subject: [PATCH 12/16] validate within _parser --- doc/source/user_guide/io.rst | 2 +- pandas/io/excel/_base.py | 3 ++- pandas/io/parsers/base_parser.py | 12 +++++++++ pandas/io/parsers/readers.py | 22 ---------------- pandas/tests/io/excel/test_writers.py | 12 ++++++++- pandas/tests/io/parser/test_parse_dates.py | 2 ++ pandas/tests/io/parser/test_read_fwf.py | 29 +++++++++++++++------- 7 files changed, 48 insertions(+), 34 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index e3c1120c3d7f6..627c40f26a662 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -810,7 +810,7 @@ Specifying date columns +++++++++++++++++++++++ To better facilitate working with datetime data, :func:`read_csv` -uses the keyword arguments ``parse_dates`` and ``date_parser`` +uses the keyword arguments ``parse_dates`` and ``date_format`` to allow users to specify a variety of columns and date/time formats to turn the input text data into ``datetime`` objects. diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 5eb80f4d1d251..d65e5c13af4b8 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -397,6 +397,7 @@ def read_excel( verbose: bool = ..., parse_dates: list | dict | bool = ..., date_parser: Callable | None = ..., + date_format: str | None = ..., thousands: str | None = ..., decimal: str = ..., comment: str | None = ..., @@ -436,6 +437,7 @@ def read_excel( verbose: bool = ..., parse_dates: list | dict | bool = ..., date_parser: Callable | None = ..., + date_format: str | None = ..., thousands: str | None = ..., decimal: str = ..., comment: str | None = ..., @@ -483,7 +485,6 @@ def read_excel( storage_options: StorageOptions = None, use_nullable_dtypes: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | dict[IntStrT, DataFrame]: - should_close = False if not isinstance(io, ExcelFile): should_close = True diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 7422bbebf4f03..c9c54e7c7cace 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1098,6 +1098,18 @@ def _make_date_converter( cache_dates: bool = True, date_format=None, ): + if date_parser is not None: + warnings.warn( + "The argument 'date_parser' is deprecated and will " + "be removed in a future version. " + "Please use 'date_format' instead, or read your data in as 'object' dtype " + "and then call 'to_datetime'.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if date_parser is not None and date_format is not None: + raise TypeError("Cannot use both 'date_parser' and 'date_format'") + def converter(*date_cols): if date_parser is None: strs = parsing.concat_date_cols(date_cols) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index f3ffcb29edf73..de5904b79c57d 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -908,17 +908,6 @@ def read_csv( FutureWarning, stacklevel=find_stack_level(), ) - if date_parser is not None: - warnings.warn( - "The argument 'date_parser' is deprecated and will " - "be removed in a future version. " - "Please use 'date_format' instead, or read your data in as 'object' dtype " - "and then call 'to_datetime'.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if date_parser is not None and date_format is not None: - raise TypeError("Cannot use both 'date_parser' and 'date_format'") # locals() should never be modified kwds = locals().copy() del kwds["filepath_or_buffer"] @@ -1248,17 +1237,6 @@ def read_table( FutureWarning, stacklevel=find_stack_level(), ) - if date_parser is not None: - warnings.warn( - "The argument 'date_parser' is deprecated and will " - "be removed in a future version. " - "Please use 'date_format' instead, or read your data in as 'object' dtype " - "and then call 'to_datetime'.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if date_parser is not None and date_format is not None: - raise TypeError("Cannot use both 'date_parser' and 'date_format'") # locals() should never be modified kwds = locals().copy() diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index ec6484c5c2149..61301507c600d 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -269,8 +269,18 @@ def test_read_excel_parse_dates(self, ext): tm.assert_frame_equal(df, res) date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y") + with tm.assert_produces_warning( + FutureWarning, match="use 'date_format' instead" + ): + res = pd.read_excel( + pth, + parse_dates=["date_strings"], + date_parser=date_parser, + index_col=0, + ) + tm.assert_frame_equal(df, res) res = pd.read_excel( - pth, parse_dates=["date_strings"], date_parser=date_parser, index_col=0 + pth, parse_dates=["date_strings"], date_format="%m/%d/%Y", index_col=0 ) tm.assert_frame_equal(df, res) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index fa630990ecbcd..3a5a20ec1df51 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -775,6 +775,8 @@ def test_csv_custom_parser(all_parsers): ) expected = parser.read_csv(StringIO(data), parse_dates=True) tm.assert_frame_equal(result, expected) + result = parser.read_csv(StringIO(data), date_format="%Y%m%d") + tm.assert_frame_equal(result, expected) @xfail_pyarrow diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index c2939f7c12f10..47379aaab6feb 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -284,15 +284,16 @@ def test_fwf_regression(): 2009164210000 9.6034 9.0897 8.3822 7.4905 6.0908 5.7904 5.4039 """ - result = read_fwf( - StringIO(data), - index_col=0, - header=None, - names=names, - widths=widths, - parse_dates=True, - date_parser=lambda s: datetime.strptime(s, "%Y%j%H%M%S"), - ) + with tm.assert_produces_warning(FutureWarning, match="use 'date_format' instead"): + result = read_fwf( + StringIO(data), + index_col=0, + header=None, + names=names, + widths=widths, + parse_dates=True, + date_parser=lambda s: datetime.strptime(s, "%Y%j%H%M%S"), + ) expected = DataFrame( [ [9.5403, 9.4105, 8.6571, 7.8372, 6.0612, 5.8843, 5.5192], @@ -313,6 +314,16 @@ def test_fwf_regression(): columns=["SST", "T010", "T020", "T030", "T060", "T080", "T100"], ) tm.assert_frame_equal(result, expected) + result = read_fwf( + StringIO(data), + index_col=0, + header=None, + names=names, + widths=widths, + parse_dates=True, + date_format="%Y%j%H%M%S", + ) + tm.assert_frame_equal(result, expected) def test_fwf_for_uint8(): From 4a233d7718fe8c2cfc1bb6d2d51aba5eac165912 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 8 Feb 2023 14:29:10 +0000 Subject: [PATCH 13/16] minor fixup --- pandas/io/excel/_base.py | 1 + pandas/io/parsers/base_parser.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d65e5c13af4b8..d32f3ff3b6154 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -485,6 +485,7 @@ def read_excel( storage_options: StorageOptions = None, use_nullable_dtypes: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | dict[IntStrT, DataFrame]: + should_close = False if not isinstance(io, ExcelFile): should_close = True diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index c9c54e7c7cace..89ad16d6c33d5 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1096,7 +1096,7 @@ def _make_date_converter( date_parser=None, dayfirst: bool = False, cache_dates: bool = True, - date_format=None, + date_format: str | None = None, ): if date_parser is not None: warnings.warn( From 5737c7072d1ffc7882cefb52c1e0f25d1c419fb3 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 8 Feb 2023 18:47:54 +0000 Subject: [PATCH 14/16] mention other readers in whatsnew, None -> no_default --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/io/excel/_base.py | 10 +++++----- pandas/io/parsers/base_parser.py | 12 ++++++------ pandas/io/parsers/readers.py | 9 ++++++--- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b23692caceb2a..5c52386de1717 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -782,7 +782,7 @@ Deprecations - :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`) - :meth:`Index.is_object` has been deprecated. Use :func:`pandas.api.types.is_object_dtype` instead (:issue:`50042`) - :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`) -- Deprecated argument ``date_parser`` in :func:`read_csv` and :func:`read_table` in favour of ``date_format`` (:issue:`50601`) +- Deprecated argument ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`) - Deprecated ``all`` and ``any`` reductions with ``datetime64`` and :class:`DatetimeTZDtype` dtypes, use e.g. ``(obj != pd.Timestamp(0), tz=obj.tz).all()`` instead (:issue:`34479`) - Deprecated unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` (:issue:`50977`) - Deprecated calling ``float`` or ``int`` on a single element :class:`Series` to return a ``float`` or ``int`` respectively. Extract the element before calling ``float`` or ``int`` instead (:issue:`51101`) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d32f3ff3b6154..ea4919752f51f 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -396,7 +396,7 @@ def read_excel( na_filter: bool = ..., verbose: bool = ..., parse_dates: list | dict | bool = ..., - date_parser: Callable | None = ..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., thousands: str | None = ..., decimal: str = ..., @@ -436,7 +436,7 @@ def read_excel( na_filter: bool = ..., verbose: bool = ..., parse_dates: list | dict | bool = ..., - date_parser: Callable | None = ..., + date_parser: Callable | lib.NoDefault = ..., date_format: str | None = ..., thousands: str | None = ..., decimal: str = ..., @@ -476,7 +476,7 @@ def read_excel( na_filter: bool = True, verbose: bool = False, parse_dates: list | dict | bool = False, - date_parser: Callable | None = None, + date_parser: Callable | lib.NoDefault = lib.no_default, date_format: str | None = None, thousands: str | None = None, decimal: str = ".", @@ -726,7 +726,7 @@ def parse( na_values=None, verbose: bool = False, parse_dates: list | dict | bool = False, - date_parser: Callable | None = None, + date_parser: Callable | lib.NoDefault = lib.no_default, date_format: str | None = None, thousands: str | None = None, decimal: str = ".", @@ -1555,7 +1555,7 @@ def parse( nrows: int | None = None, na_values=None, parse_dates: list | dict | bool = False, - date_parser: Callable | None = None, + date_parser: Callable | lib.NoDefault = lib.no_default, date_format: str | None = None, thousands: str | None = None, comment: str | None = None, diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 89ad16d6c33d5..8aec713aead21 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -115,7 +115,7 @@ def __init__(self, kwds) -> None: self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False)) self._parse_date_cols: Iterable = [] - self.date_parser = kwds.pop("date_parser", None) + self.date_parser = kwds.pop("date_parser", lib.no_default) self.date_format = kwds.pop("date_format", None) self.dayfirst = kwds.pop("dayfirst", False) self.keep_date_col = kwds.pop("keep_date_col", False) @@ -1093,12 +1093,12 @@ def _get_empty_meta( def _make_date_converter( - date_parser=None, + date_parser=lib.no_default, dayfirst: bool = False, cache_dates: bool = True, date_format: str | None = None, ): - if date_parser is not None: + if date_parser is not lib.no_default: warnings.warn( "The argument 'date_parser' is deprecated and will " "be removed in a future version. " @@ -1107,11 +1107,11 @@ def _make_date_converter( FutureWarning, stacklevel=find_stack_level(), ) - if date_parser is not None and date_format is not None: + if date_parser is not lib.no_default and date_format is not None: raise TypeError("Cannot use both 'date_parser' and 'date_format'") def converter(*date_cols): - if date_parser is None: + if date_parser is lib.no_default: strs = parsing.concat_date_cols(date_cols) return tools.to_datetime( @@ -1170,7 +1170,7 @@ def converter(*date_cols): "parse_dates": False, "keep_date_col": False, "dayfirst": False, - "date_parser": None, + "date_parser": lib.no_default, "date_format": None, "usecols": None, # 'iterator': False, diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index de5904b79c57d..47292bc84fa06 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -555,7 +555,10 @@ def _read( # if we pass a date_parser and parse_dates=False, we should not parse the # dates GH#44366 if kwds.get("parse_dates", None) is None: - if kwds.get("date_parser", None) is None and kwds.get("date_format") is None: + if ( + kwds.get("date_parser", lib.no_default) is lib.no_default + and kwds.get("date_format", None) is None + ): kwds["parse_dates"] = False else: kwds["parse_dates"] = True @@ -868,7 +871,7 @@ def read_csv( parse_dates: bool | Sequence[Hashable] | None = None, infer_datetime_format: bool | lib.NoDefault = lib.no_default, keep_date_col: bool = False, - date_parser=None, + date_parser=lib.no_default, date_format: str | None = None, dayfirst: bool = False, cache_dates: bool = True, @@ -1197,7 +1200,7 @@ def read_table( parse_dates: bool | Sequence[Hashable] = False, infer_datetime_format: bool | lib.NoDefault = lib.no_default, keep_date_col: bool = False, - date_parser=None, + date_parser=lib.no_default, date_format: str | None = None, dayfirst: bool = False, cache_dates: bool = True, From ca0db5c5e3fe2ee8cc4e2fcd5dfa821d5b5a632e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 10 Feb 2023 13:39:04 +0100 Subject: [PATCH 15/16] Update v2.0.0.rst --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 5c52386de1717..62879acc8a6ad 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -292,7 +292,7 @@ Other enhancements - Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`) - Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`) - Added new argument ``dtype`` to :func:`read_sql` to be consistent with :func:`read_sql_query` (:issue:`50797`) -- :func:`read_csv` and :func:`read_table` now accept ``date_format`` (:issue:`50601`) +- :func:`read_csv`, :func:`read_table`, :func:`read_fwf` and :func:`read_excel` now accept ``date_format`` (:issue:`50601`) - .. --------------------------------------------------------------------------- From e9ce938efe529e09c4a03e07671d695da54486db Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 15 Feb 2023 17:53:48 +0000 Subject: [PATCH 16/16] fixup merge conflict resolution --- doc/source/whatsnew/v2.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 5cd5bfae2a88f..1b188b6065378 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -826,7 +826,6 @@ Deprecations - :meth:`Index.is_numeric` has been deprecated. Use :func:`pandas.api.types.is_any_real_numeric_dtype` instead (:issue:`50042`,:issue:`51152`) - :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`) - :meth:`Index.is_object` has been deprecated. Use :func:`pandas.api.types.is_object_dtype` instead (:issue:`50042`) -- :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`) - :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_interval_dtype` instead (:issue:`50042`) - Deprecated argument ``date_parser`` in :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_excel` in favour of ``date_format`` (:issue:`50601`) - Deprecated ``all`` and ``any`` reductions with ``datetime64`` and :class:`DatetimeTZDtype` dtypes, use e.g. ``(obj != pd.Timestamp(0), tz=obj.tz).all()`` instead (:issue:`34479`)