diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index a03a5d7164e63..96a36e4d3b921 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -170,7 +170,6 @@ Other Enhancements - ``pd.crosstab()`` has gained a ``normalize`` argument for normalizing frequency tables (:issue:`12569`). Examples in the updated docs :ref:`here `. - .. _whatsnew_0181.sparse: Sparse changes diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 35c055e5e48cd..843031fafa1a9 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -683,7 +683,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, seen_float = 1 elif util.is_datetime64_object(val): if convert_datetime: - idatetimes[i] = convert_to_tsobject(val, None, None).value + idatetimes[i] = convert_to_tsobject(val, None, None, 0, 0).value seen_datetime = 1 else: seen_object = 1 @@ -712,7 +712,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, elif PyDateTime_Check(val) or util.is_datetime64_object(val): if convert_datetime: seen_datetime = 1 - idatetimes[i] = convert_to_tsobject(val, None, None).value + idatetimes[i] = convert_to_tsobject(val, None, None, 0, 0).value else: seen_object = 1 break diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index ada4d92086408..e4f91b25777a3 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -592,80 +592,101 @@ def test_parsers_quarter_invalid(self): self.assertRaises(ValueError, tools.parse_time_string, case) def test_parsers_dayfirst_yearfirst(self): - raise nose.SkipTest("skipping until comprehensive fixes for dateutil, " - "xref #12944") + tm._skip_if_no_dateutil() + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + + # bug fix in 2.5.2 + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + + # revert of bug in 2.5.2 + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 - # https://github.com/dateutil/dateutil/issues/217 - # this issue was closed import dateutil - is_compat_version = dateutil.__version__ >= LooseVersion('2.5.2') - if is_compat_version: - dayfirst_yearfirst1 = datetime.datetime(2010, 12, 11) - dayfirst_yearfirst2 = datetime.datetime(2020, 12, 21) - else: - dayfirst_yearfirst1 = datetime.datetime(2010, 11, 12) - dayfirst_yearfirst2 = datetime.datetime(2020, 12, 21) + is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') # str : dayfirst, yearfirst, expected - cases = {'10-11-12': [(False, False, False, + cases = {'10-11-12': [(False, False, datetime.datetime(2012, 10, 11)), - (True, False, False, + (True, False, datetime.datetime(2012, 11, 10)), - (False, True, False, + (False, True, datetime.datetime(2010, 11, 12)), - (True, True, False, dayfirst_yearfirst1)], - '20/12/21': [(False, False, False, + (True, True, + datetime.datetime(2010, 12, 11))], + '20/12/21': [(False, False, datetime.datetime(2021, 12, 20)), - (True, False, False, + (True, False, datetime.datetime(2021, 12, 20)), - (False, True, False, + (False, True, datetime.datetime(2020, 12, 21)), - (True, True, True, dayfirst_yearfirst2)]} + (True, True, + datetime.datetime(2020, 12, 21))]} - tm._skip_if_no_dateutil() from dateutil.parser import parse for date_str, values in compat.iteritems(cases): - for dayfirst, yearfirst, is_compat, expected in values: + for dayfirst, yearfirst, expected in values: - f = lambda x: tools.parse_time_string(x, - dayfirst=dayfirst, - yearfirst=yearfirst) - - # we now have an invalid parse - if is_compat and is_compat_version: - self.assertRaises(tslib.DateParseError, f, date_str) - - def f(date_str): - return to_datetime(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) - - self.assertRaises(ValueError, f, date_str) - - def f(date_str): - return DatetimeIndex([date_str], dayfirst=dayfirst, - yearfirst=yearfirst)[0] + # odd comparisons across version + # let's just skip + if dayfirst and yearfirst and is_lt_253: + continue - self.assertRaises(ValueError, f, date_str) + # compare with dateutil result + dateutil_result = parse(date_str, dayfirst=dayfirst, + yearfirst=yearfirst) + self.assertEqual(dateutil_result, expected) - continue + result1, _, _ = tools.parse_time_string(date_str, + dayfirst=dayfirst, + yearfirst=yearfirst) - result1, _, _ = f(date_str) + # we don't support dayfirst/yearfirst here: + if not dayfirst and not yearfirst: + result2 = Timestamp(date_str) + self.assertEqual(result2, expected) - result2 = to_datetime(date_str, dayfirst=dayfirst, + result3 = to_datetime(date_str, dayfirst=dayfirst, yearfirst=yearfirst) - result3 = DatetimeIndex([date_str], dayfirst=dayfirst, + result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0] - # Timestamp doesn't support dayfirst and yearfirst self.assertEqual(result1, expected) - self.assertEqual(result2, expected) self.assertEqual(result3, expected) - - # compare with dateutil result - dateutil_result = parse(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) - self.assertEqual(dateutil_result, expected) + self.assertEqual(result4, expected) def test_parsers_timestring(self): tm._skip_if_no_dateutil() diff --git a/pandas/tslib.pxd b/pandas/tslib.pxd index 5e0c88604206c..d6c5810e1d713 100644 --- a/pandas/tslib.pxd +++ b/pandas/tslib.pxd @@ -1,6 +1,6 @@ from numpy cimport ndarray, int64_t -cdef convert_to_tsobject(object, object, object) +cdef convert_to_tsobject(object, object, object, bint, bint) cdef convert_to_timedelta64(object, object, object) cpdef object maybe_get_tz(object) cdef bint _is_utc(object) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index bd6c72e1a7a1c..a325c140d36d9 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -237,9 +237,6 @@ class Timestamp(_Timestamp): numpy unit used for conversion, if ts_input is int or float """ - # Do not add ``dayfirst`` and ``yearfist`` to Timestamp based on the discussion - # https://github.com/pydata/pandas/pull/7599 - @classmethod def fromordinal(cls, ordinal, offset=None, tz=None): """ passed an ordinal, translate and convert to a ts @@ -295,7 +292,7 @@ class Timestamp(_Timestamp): cdef _TSObject ts cdef _Timestamp ts_base - ts = convert_to_tsobject(ts_input, tz, unit) + ts = convert_to_tsobject(ts_input, tz, unit, 0, 0) if ts.value == NPY_NAT: return NaT @@ -544,7 +541,7 @@ class Timestamp(_Timestamp): if self.nanosecond != 0 and warn: print 'Warning: discarding nonzero nanoseconds' - ts = convert_to_tsobject(self, self.tzinfo, None) + ts = convert_to_tsobject(self, self.tzinfo, None, 0, 0) return datetime(ts.dts.year, ts.dts.month, ts.dts.day, ts.dts.hour, ts.dts.min, ts.dts.sec, @@ -997,7 +994,7 @@ cdef class _Timestamp(datetime): cdef: pandas_datetimestruct dts _TSObject ts - ts = convert_to_tsobject(self, self.tzinfo, None) + ts = convert_to_tsobject(self, self.tzinfo, None, 0, 0) dts = ts.dts return datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, @@ -1237,7 +1234,8 @@ cpdef _get_utcoffset(tzinfo, obj): return tzinfo.utcoffset(obj) # helper to extract datetime and int64 from several different possibilities -cdef convert_to_tsobject(object ts, object tz, object unit): +cdef convert_to_tsobject(object ts, object tz, object unit, + bint dayfirst, bint yearfirst): """ Extract datetime and int64 from any of: - np.int64 (with unit providing a possible modifier) @@ -1259,7 +1257,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit): obj = _TSObject() if util.is_string_object(ts): - return convert_str_to_tsobject(ts, tz, unit) + return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) if ts is None or ts is NaT: obj.value = NPY_NAT @@ -1329,7 +1327,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit): elif PyDate_Check(ts): # Keep the converter same as PyDateTime's ts = datetime.combine(ts, datetime_time()) - return convert_to_tsobject(ts, tz, None) + return convert_to_tsobject(ts, tz, None, 0, 0) elif getattr(ts, '_typ', None) == 'period': raise ValueError("Cannot convert Period to Timestamp unambiguously. Use to_timestamp") else: @@ -1390,7 +1388,7 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit, except Exception: raise ValueError - return convert_to_tsobject(ts, tz, unit) + return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) def _test_parse_iso8601(object ts): """ @@ -1581,7 +1579,7 @@ def datetime_to_datetime64(ndarray[object] values): else: inferred_tz = _get_zone(val.tzinfo) - _ts = convert_to_tsobject(val, None, None) + _ts = convert_to_tsobject(val, None, None, 0, 0) iresult[i] = _ts.value _check_dts_bounds(&_ts.dts) else: @@ -1993,7 +1991,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', seen_datetime=1 if val.tzinfo is not None: if utc_convert: - _ts = convert_to_tsobject(val, None, unit) + _ts = convert_to_tsobject(val, None, unit, 0, 0) iresult[i] = _ts.value try: _check_dts_bounds(&_ts.dts) @@ -2091,7 +2089,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', raise TypeError("invalid string coercion to datetime") try: - _ts = convert_to_tsobject(py_dt, None, None) + _ts = convert_to_tsobject(py_dt, None, None, 0, 0) iresult[i] = _ts.value except ValueError: if is_coerce: @@ -2180,7 +2178,7 @@ def parse_str_array_to_datetime(ndarray values, dayfirst=False, yearfirst=yearfirst, freq=freq) except Exception: raise ValueError - _ts = convert_to_tsobject(py_dt, None, None) + _ts = convert_to_tsobject(py_dt, None, None, 0, 0) iresult[i] = _ts.value return iresult @@ -3466,7 +3464,7 @@ def pydt_to_i8(object pydt): cdef: _TSObject ts - ts = convert_to_tsobject(pydt, None, None) + ts = convert_to_tsobject(pydt, None, None, 0, 0) return ts.value @@ -4230,7 +4228,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N if dtindex[i] == NPY_NAT: out[i] = -1; continue pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None) + ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day dow = ts_dayofweek(ts) @@ -4254,7 +4252,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N if dtindex[i] == NPY_NAT: out[i] = -1; continue pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None) + ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day @@ -4286,7 +4284,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N if dtindex[i] == NPY_NAT: out[i] = -1; continue pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None) + ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day dow = ts_dayofweek(ts) @@ -4310,7 +4308,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N if dtindex[i] == NPY_NAT: out[i] = -1; continue pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None) + ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day @@ -4342,7 +4340,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N if dtindex[i] == NPY_NAT: out[i] = -1; continue pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None) + ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day dow = ts_dayofweek(ts) @@ -4366,7 +4364,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N if dtindex[i] == NPY_NAT: out[i] = -1; continue pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None) + ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) dom = dts.day mo_off = _month_offset[isleap, dts.month - 1] @@ -4382,7 +4380,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=N if dtindex[i] == NPY_NAT: out[i] = -1; continue pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None) + ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day @@ -4429,7 +4427,7 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field): cdef inline int m8_weekday(int64_t val): - ts = convert_to_tsobject(val, None, None) + ts = convert_to_tsobject(val, None, None, 0, 0) return ts_dayofweek(ts) cdef int64_t DAY_NS = 86400000000000LL