Skip to content

ENH/COMPAT: update tests for dateutil 2.5.3 #12951

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ Other Enhancements

- ``pd.crosstab()`` has gained a ``normalize`` argument for normalizing frequency tables (:issue:`12569`). Examples in the updated docs :ref:`here <reshaping.crosstabulations>`.


.. _whatsnew_0181.sparse:

Sparse changes
Expand Down
4 changes: 2 additions & 2 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
seen_float = 1
elif util.is_datetime64_object(val):
if convert_datetime:
idatetimes[i] = convert_to_tsobject(val, None, None).value
idatetimes[i] = convert_to_tsobject(val, None, None, 0, 0).value
seen_datetime = 1
else:
seen_object = 1
Expand Down Expand Up @@ -712,7 +712,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
elif PyDateTime_Check(val) or util.is_datetime64_object(val):
if convert_datetime:
seen_datetime = 1
idatetimes[i] = convert_to_tsobject(val, None, None).value
idatetimes[i] = convert_to_tsobject(val, None, None, 0, 0).value
else:
seen_object = 1
break
Expand Down
121 changes: 71 additions & 50 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,80 +592,101 @@ def test_parsers_quarter_invalid(self):
self.assertRaises(ValueError, tools.parse_time_string, case)

def test_parsers_dayfirst_yearfirst(self):
raise nose.SkipTest("skipping until comprehensive fixes for dateutil, "
"xref #12944")
tm._skip_if_no_dateutil()

# OK
# 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
# 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the yearfirst be 0 here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure what you mean, this is just printing out the 4 combinations of booleans (True/False).

# 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00

# OK
# 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
# 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00
# 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00

# bug fix in 2.5.2
# 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00
# 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00
# 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00

# OK
# 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
# 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00
# 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00

# OK
# 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
# 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00
# 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00

# OK
# 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
# 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00
# 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00

# revert of bug in 2.5.2
# 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00
# 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12
# 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00

# OK
# 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
# 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00
# 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00

# https://github.com/dateutil/dateutil/issues/217
# this issue was closed
import dateutil
is_compat_version = dateutil.__version__ >= LooseVersion('2.5.2')
if is_compat_version:
dayfirst_yearfirst1 = datetime.datetime(2010, 12, 11)
dayfirst_yearfirst2 = datetime.datetime(2020, 12, 21)
else:
dayfirst_yearfirst1 = datetime.datetime(2010, 11, 12)
dayfirst_yearfirst2 = datetime.datetime(2020, 12, 21)
is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3')

# str : dayfirst, yearfirst, expected
cases = {'10-11-12': [(False, False, False,
cases = {'10-11-12': [(False, False,
datetime.datetime(2012, 10, 11)),
(True, False, False,
(True, False,
datetime.datetime(2012, 11, 10)),
(False, True, False,
(False, True,
datetime.datetime(2010, 11, 12)),
(True, True, False, dayfirst_yearfirst1)],
'20/12/21': [(False, False, False,
(True, True,
datetime.datetime(2010, 12, 11))],
'20/12/21': [(False, False,
datetime.datetime(2021, 12, 20)),
(True, False, False,
(True, False,
datetime.datetime(2021, 12, 20)),
(False, True, False,
(False, True,
datetime.datetime(2020, 12, 21)),
(True, True, True, dayfirst_yearfirst2)]}
(True, True,
datetime.datetime(2020, 12, 21))]}

tm._skip_if_no_dateutil()
from dateutil.parser import parse
for date_str, values in compat.iteritems(cases):
for dayfirst, yearfirst, is_compat, expected in values:
for dayfirst, yearfirst, expected in values:

f = lambda x: tools.parse_time_string(x,
dayfirst=dayfirst,
yearfirst=yearfirst)

# we now have an invalid parse
if is_compat and is_compat_version:
self.assertRaises(tslib.DateParseError, f, date_str)

def f(date_str):
return to_datetime(date_str, dayfirst=dayfirst,
yearfirst=yearfirst)

self.assertRaises(ValueError, f, date_str)

def f(date_str):
return DatetimeIndex([date_str], dayfirst=dayfirst,
yearfirst=yearfirst)[0]
# odd comparisons across version
# let's just skip
if dayfirst and yearfirst and is_lt_253:
continue

self.assertRaises(ValueError, f, date_str)
# compare with dateutil result
dateutil_result = parse(date_str, dayfirst=dayfirst,
yearfirst=yearfirst)
self.assertEqual(dateutil_result, expected)

continue
result1, _, _ = tools.parse_time_string(date_str,
dayfirst=dayfirst,
yearfirst=yearfirst)

result1, _, _ = f(date_str)
# we don't support dayfirst/yearfirst here:
if not dayfirst and not yearfirst:
result2 = Timestamp(date_str)
self.assertEqual(result2, expected)

result2 = to_datetime(date_str, dayfirst=dayfirst,
result3 = to_datetime(date_str, dayfirst=dayfirst,
yearfirst=yearfirst)

result3 = DatetimeIndex([date_str], dayfirst=dayfirst,
result4 = DatetimeIndex([date_str], dayfirst=dayfirst,
yearfirst=yearfirst)[0]

# Timestamp doesn't support dayfirst and yearfirst
self.assertEqual(result1, expected)
self.assertEqual(result2, expected)
self.assertEqual(result3, expected)

# compare with dateutil result
dateutil_result = parse(date_str, dayfirst=dayfirst,
yearfirst=yearfirst)
self.assertEqual(dateutil_result, expected)
self.assertEqual(result4, expected)

def test_parsers_timestring(self):
tm._skip_if_no_dateutil()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tslib.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from numpy cimport ndarray, int64_t

cdef convert_to_tsobject(object, object, object)
cdef convert_to_tsobject(object, object, object, bint, bint)
cdef convert_to_timedelta64(object, object, object)
cpdef object maybe_get_tz(object)
cdef bint _is_utc(object)
Expand Down
44 changes: 21 additions & 23 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,6 @@ class Timestamp(_Timestamp):
numpy unit used for conversion, if ts_input is int or float
"""

# Do not add ``dayfirst`` and ``yearfist`` to Timestamp based on the discussion
# https://github.com/pydata/pandas/pull/7599

@classmethod
def fromordinal(cls, ordinal, offset=None, tz=None):
""" passed an ordinal, translate and convert to a ts
Expand Down Expand Up @@ -295,7 +292,7 @@ class Timestamp(_Timestamp):
cdef _TSObject ts
cdef _Timestamp ts_base

ts = convert_to_tsobject(ts_input, tz, unit)
ts = convert_to_tsobject(ts_input, tz, unit, 0, 0)

if ts.value == NPY_NAT:
return NaT
Expand Down Expand Up @@ -544,7 +541,7 @@ class Timestamp(_Timestamp):

if self.nanosecond != 0 and warn:
print 'Warning: discarding nonzero nanoseconds'
ts = convert_to_tsobject(self, self.tzinfo, None)
ts = convert_to_tsobject(self, self.tzinfo, None, 0, 0)

return datetime(ts.dts.year, ts.dts.month, ts.dts.day,
ts.dts.hour, ts.dts.min, ts.dts.sec,
Expand Down Expand Up @@ -997,7 +994,7 @@ cdef class _Timestamp(datetime):
cdef:
pandas_datetimestruct dts
_TSObject ts
ts = convert_to_tsobject(self, self.tzinfo, None)
ts = convert_to_tsobject(self, self.tzinfo, None, 0, 0)
dts = ts.dts
return datetime(dts.year, dts.month, dts.day,
dts.hour, dts.min, dts.sec,
Expand Down Expand Up @@ -1237,7 +1234,8 @@ cpdef _get_utcoffset(tzinfo, obj):
return tzinfo.utcoffset(obj)

# helper to extract datetime and int64 from several different possibilities
cdef convert_to_tsobject(object ts, object tz, object unit):
cdef convert_to_tsobject(object ts, object tz, object unit,
bint dayfirst, bint yearfirst):
"""
Extract datetime and int64 from any of:
- np.int64 (with unit providing a possible modifier)
Expand All @@ -1259,7 +1257,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit):
obj = _TSObject()

if util.is_string_object(ts):
return convert_str_to_tsobject(ts, tz, unit)
return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst)

if ts is None or ts is NaT:
obj.value = NPY_NAT
Expand Down Expand Up @@ -1329,7 +1327,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit):
elif PyDate_Check(ts):
# Keep the converter same as PyDateTime's
ts = datetime.combine(ts, datetime_time())
return convert_to_tsobject(ts, tz, None)
return convert_to_tsobject(ts, tz, None, 0, 0)
elif getattr(ts, '_typ', None) == 'period':
raise ValueError("Cannot convert Period to Timestamp unambiguously. Use to_timestamp")
else:
Expand Down Expand Up @@ -1390,7 +1388,7 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit,
except Exception:
raise ValueError

return convert_to_tsobject(ts, tz, unit)
return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst)

def _test_parse_iso8601(object ts):
"""
Expand Down Expand Up @@ -1581,7 +1579,7 @@ def datetime_to_datetime64(ndarray[object] values):
else:
inferred_tz = _get_zone(val.tzinfo)

_ts = convert_to_tsobject(val, None, None)
_ts = convert_to_tsobject(val, None, None, 0, 0)
iresult[i] = _ts.value
_check_dts_bounds(&_ts.dts)
else:
Expand Down Expand Up @@ -1993,7 +1991,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
seen_datetime=1
if val.tzinfo is not None:
if utc_convert:
_ts = convert_to_tsobject(val, None, unit)
_ts = convert_to_tsobject(val, None, unit, 0, 0)
iresult[i] = _ts.value
try:
_check_dts_bounds(&_ts.dts)
Expand Down Expand Up @@ -2091,7 +2089,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
raise TypeError("invalid string coercion to datetime")

try:
_ts = convert_to_tsobject(py_dt, None, None)
_ts = convert_to_tsobject(py_dt, None, None, 0, 0)
iresult[i] = _ts.value
except ValueError:
if is_coerce:
Expand Down Expand Up @@ -2180,7 +2178,7 @@ def parse_str_array_to_datetime(ndarray values, dayfirst=False,
yearfirst=yearfirst, freq=freq)
except Exception:
raise ValueError
_ts = convert_to_tsobject(py_dt, None, None)
_ts = convert_to_tsobject(py_dt, None, None, 0, 0)
iresult[i] = _ts.value

return iresult
Expand Down Expand Up @@ -3466,7 +3464,7 @@ def pydt_to_i8(object pydt):
cdef:
_TSObject ts

ts = convert_to_tsobject(pydt, None, None)
ts = convert_to_tsobject(pydt, None, None, 0, 0)

return ts.value

Expand Down Expand Up @@ -4230,7 +4228,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N
if dtindex[i] == NPY_NAT: out[i] = -1; continue

pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
ts = convert_to_tsobject(dtindex[i], None, None)
ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
dom = dts.day
dow = ts_dayofweek(ts)

Expand All @@ -4254,7 +4252,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N
if dtindex[i] == NPY_NAT: out[i] = -1; continue

pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
ts = convert_to_tsobject(dtindex[i], None, None)
ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
isleap = is_leapyear(dts.year)
mo_off = _month_offset[isleap, dts.month - 1]
dom = dts.day
Expand Down Expand Up @@ -4286,7 +4284,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N
if dtindex[i] == NPY_NAT: out[i] = -1; continue

pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
ts = convert_to_tsobject(dtindex[i], None, None)
ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
dom = dts.day
dow = ts_dayofweek(ts)

Expand All @@ -4310,7 +4308,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N
if dtindex[i] == NPY_NAT: out[i] = -1; continue

pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
ts = convert_to_tsobject(dtindex[i], None, None)
ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
isleap = is_leapyear(dts.year)
mo_off = _month_offset[isleap, dts.month - 1]
dom = dts.day
Expand Down Expand Up @@ -4342,7 +4340,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N
if dtindex[i] == NPY_NAT: out[i] = -1; continue

pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
ts = convert_to_tsobject(dtindex[i], None, None)
ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
dom = dts.day
dow = ts_dayofweek(ts)

Expand All @@ -4366,7 +4364,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N
if dtindex[i] == NPY_NAT: out[i] = -1; continue

pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
ts = convert_to_tsobject(dtindex[i], None, None)
ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
isleap = is_leapyear(dts.year)
dom = dts.day
mo_off = _month_offset[isleap, dts.month - 1]
Expand All @@ -4382,7 +4380,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N
if dtindex[i] == NPY_NAT: out[i] = -1; continue

pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
ts = convert_to_tsobject(dtindex[i], None, None)
ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
isleap = is_leapyear(dts.year)
mo_off = _month_offset[isleap, dts.month - 1]
dom = dts.day
Expand Down Expand Up @@ -4429,7 +4427,7 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field):


cdef inline int m8_weekday(int64_t val):
ts = convert_to_tsobject(val, None, None)
ts = convert_to_tsobject(val, None, None, 0, 0)
return ts_dayofweek(ts)

cdef int64_t DAY_NS = 86400000000000LL
Expand Down