From 2a423345654a00371513859a1584835f8ea98216 Mon Sep 17 00:00:00 2001 From: Adam Greenhall Date: Mon, 21 Oct 2013 14:01:24 -0700 Subject: [PATCH] adjust timezone when adding date offsets for DST. fixes #5175 --- doc/source/v0.15.0.txt | 3 +- pandas/tseries/offsets.py | 45 +++++++++- pandas/tseries/tests/test_offsets.py | 128 +++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 5 deletions(-) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 1db189fcc74e3..006b44fe02379 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -596,12 +596,13 @@ Bug Fixes - Bug in area plot draws legend with incorrect ``alpha`` when ``stacked=True`` (:issue:`8027`) -- ``Period`` and ``PeriodIndex`` addition/subtraction with ``np.timedelta64`` results in incorrect internal representations (:issue:`7740`) - ``Holiday`` bug in Holiday with no offset or observance (:issue:`7987`) - Bug in ``DataFrame.to_latex`` formatting when columns or index is a ``MultiIndex`` (:issue:`7982`). +- Bug in ``DateOffset`` around Daylight Savings Time produces unexpected results (:issue:`5175`). + diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index cd37f4000e5a2..8bb5584fee7a7 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -152,6 +152,12 @@ def __add__(date): """ _cacheable = False _normalize_cache = True + _kwds_use_relativedelta = ( + 'years', 'months', 'weeks', 'days', + 'year', 'month', 'week', 'day', 'weekday', + 'hour', 'minute', 'second', 'microsecond' + ) + _use_relativedelta = False # default for prior pickles normalize = False @@ -160,21 +166,52 @@ def __init__(self, n=1, normalize=False, **kwds): self.n = int(n) self.normalize = normalize self.kwds = kwds - if len(kwds) > 0: - self._offset = relativedelta(**kwds) + self._offset, self._use_relativedelta = self._determine_offset() + + def _determine_offset(self): + # timedelta is used for sub-daily plural offsets and all singular offsets + # relativedelta is used for plural offsets of daily length or more + # nanosecond(s) are handled by apply_wraps + kwds_no_nanos = dict( + (k, v) for k, v in self.kwds.items() + if k not in ('nanosecond', 'nanoseconds') + ) + use_relativedelta = False + + if len(kwds_no_nanos) > 0: + if any(k in self._kwds_use_relativedelta for k in kwds_no_nanos): + use_relativedelta = True + offset = relativedelta(**kwds_no_nanos) + else: + # sub-daily offset - use timedelta (tz-aware) + offset = timedelta(**kwds_no_nanos) else: - self._offset = timedelta(1) + offset = timedelta(1) + return offset, use_relativedelta @apply_wraps def apply(self, other): + if self._use_relativedelta: + other = as_datetime(other) + if len(self.kwds) > 0: + tzinfo = getattr(other, 'tzinfo', None) + if tzinfo is not None and self._use_relativedelta: + # perform calculation in UTC + other = other.replace(tzinfo=None) + if self.n > 0: for i in range(self.n): other = other + self._offset else: for i in range(-self.n): other = other - self._offset - return other + + if tzinfo is not None and self._use_relativedelta: + # bring tz back from UTC calculation + other = tslib._localize_pydatetime(other, tzinfo) + + return as_timestamp(other) else: return other + timedelta(self.n) diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index f6f91760e8ad8..b3764b73b15ac 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -3104,6 +3104,134 @@ def test_str_for_named_is_name(self): self.assertEqual(str(offset), name) +def get_utc_offset_hours(ts): + # take a Timestamp and compute total hours of utc offset + o = ts.utcoffset() + return (o.days * 24 * 3600 + o.seconds) / 3600.0 + + +class TestDST(tm.TestCase): + """ + test DateOffset additions over Daylight Savings Time + """ + # one microsecond before the DST transition + ts_pre_fallback = "2013-11-03 01:59:59.999999" + ts_pre_springfwd = "2013-03-10 01:59:59.999999" + + # test both basic names and dateutil timezones + timezone_utc_offsets = { + 'US/Eastern': dict( + utc_offset_daylight=-4, + utc_offset_standard=-5, + ), + 'dateutil/US/Pacific': dict( + utc_offset_daylight=-7, + utc_offset_standard=-8, + ) + } + valid_date_offsets_singular = [ + 'weekday', 'day', 'hour', 'minute', 'second', 'microsecond' + ] + valid_date_offsets_plural = [ + 'weeks', 'days', + 'hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds' + ] + + def _test_all_offsets(self, n, **kwds): + valid_offsets = self.valid_date_offsets_plural if n > 1 \ + else self.valid_date_offsets_singular + + for name in valid_offsets: + self._test_offset(offset_name=name, offset_n=n, **kwds) + + def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): + offset = DateOffset(**{offset_name: offset_n}) + t = tstart + offset + if expected_utc_offset is not None: + self.assertTrue(get_utc_offset_hours(t) == expected_utc_offset) + + if offset_name == 'weeks': + # dates should match + self.assertTrue( + t.date() == + timedelta(days=7 * offset.kwds['weeks']) + tstart.date() + ) + # expect the same day of week, hour of day, minute, second, ... + self.assertTrue( + t.dayofweek == tstart.dayofweek and + t.hour == tstart.hour and + t.minute == tstart.minute and + t.second == tstart.second + ) + elif offset_name == 'days': + # dates should match + self.assertTrue(timedelta(offset.kwds['days']) + tstart.date() == t.date()) + # expect the same hour of day, minute, second, ... + self.assertTrue( + t.hour == tstart.hour and + t.minute == tstart.minute and + t.second == tstart.second + ) + elif offset_name in self.valid_date_offsets_singular: + # expect the signular offset value to match between tstart and t + datepart_offset = getattr(t, offset_name if offset_name != 'weekday' else 'dayofweek') + self.assertTrue(datepart_offset == offset.kwds[offset_name]) + else: + # the offset should be the same as if it was done in UTC + self.assertTrue( + t == (tstart.tz_convert('UTC') + offset).tz_convert('US/Pacific') + ) + + def _make_timestamp(self, string, hrs_offset, tz): + offset_string = '{hrs:02d}00'.format(hrs=hrs_offset) if hrs_offset >= 0 else \ + '-{hrs:02d}00'.format(hrs=-1 * hrs_offset) + return Timestamp(string + offset_string).tz_convert(tz) + + def test_fallback_plural(self): + """test moving from daylight savings to standard time""" + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets['utc_offset_daylight'] + hrs_post = utc_offsets['utc_offset_standard'] + self._test_all_offsets( + n=3, + tstart=self._make_timestamp(self.ts_pre_fallback, hrs_pre, tz), + expected_utc_offset=hrs_post + ) + + def test_springforward_plural(self): + """test moving from standard to daylight savings""" + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets['utc_offset_standard'] + hrs_post = utc_offsets['utc_offset_daylight'] + self._test_all_offsets( + n=3, + tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), + expected_utc_offset=hrs_post + ) + + def test_fallback_singular(self): + # in the case of signular offsets, we dont neccesarily know which utc offset + # the new Timestamp will wind up in (the tz for 1 month may be different from 1 second) + # so we don't specify an expected_utc_offset + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets['utc_offset_standard'] + self._test_all_offsets( + n=1, + tstart=self._make_timestamp(self.ts_pre_fallback, hrs_pre, tz), + expected_utc_offset=None + ) + + def test_springforward_singular(self): + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets['utc_offset_standard'] + self._test_all_offsets( + n=1, + tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), + expected_utc_offset=None + ) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False)