Skip to content

Commit 2630a0b

Browse files
nathalierjreback
authored andcommitted
BUG: ignore errors for invalid dates in to_datetime() with errors=coerce (#25512) (#26561)
1 parent 6904c23 commit 2630a0b

File tree

3 files changed

+23
-3
lines changed

3 files changed

+23
-3
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,7 @@ Datetimelike
427427
- Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`)
428428
- Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`)
429429
- Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`)
430+
- Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`)
430431

431432
Timedelta
432433
^^^^^^^^^

pandas/core/tools/datetimes.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -775,21 +775,21 @@ def calc_with_mask(carg, mask):
775775
# try intlike / strings that are ints
776776
try:
777777
return calc(arg.astype(np.int64))
778-
except ValueError:
778+
except (ValueError, OverflowError):
779779
pass
780780

781781
# a float with actual np.nan
782782
try:
783783
carg = arg.astype(np.float64)
784784
return calc_with_mask(carg, notna(carg))
785-
except ValueError:
785+
except (ValueError, OverflowError):
786786
pass
787787

788788
# string with NaN-like
789789
try:
790790
mask = ~algorithms.isin(arg, list(tslib.nat_strings))
791791
return calc_with_mask(arg, mask)
792-
except ValueError:
792+
except (ValueError, OverflowError):
793793
pass
794794

795795
return None

pandas/tests/indexes/datetimes/test_tools.py

+19
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,25 @@ def test_to_datetime_format_YYYYMMDD(self, cache):
9696
result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
9797
cache=cache)
9898
expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]')
99+
tm.assert_series_equal(result, expected)
100+
101+
@pytest.mark.parametrize("input_s, expected", [
102+
# NaN before strings with invalid date values
103+
[Series(['19801222', np.nan, '20010012', '10019999']),
104+
Series([Timestamp('19801222'), np.nan, np.nan, np.nan])],
105+
# NaN after strings with invalid date values
106+
[Series(['19801222', '20010012', '10019999', np.nan]),
107+
Series([Timestamp('19801222'), np.nan, np.nan, np.nan])],
108+
# NaN before integers with invalid date values
109+
[Series([20190813, np.nan, 20010012, 20019999]),
110+
Series([Timestamp('20190813'), np.nan, np.nan, np.nan])],
111+
# NaN after integers with invalid date values
112+
[Series([20190813, 20010012, np.nan, 20019999]),
113+
Series([Timestamp('20190813'), np.nan, np.nan, np.nan])]])
114+
def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
115+
# GH 25512
116+
# format='%Y%m%d', errors='coerce'
117+
result = pd.to_datetime(input_s, format='%Y%m%d', errors='coerce')
99118
assert_series_equal(result, expected)
100119

101120
@pytest.mark.parametrize('cache', [True, False])

0 commit comments

Comments
 (0)