From 83dc7de78dcabbf298aa99aeac8e7c51a3667521 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Aug 2017 21:06:59 -0700 Subject: [PATCH 1/5] Make some attributes of DateOffset read-only; add tests --- pandas/tests/tseries/test_offsets.py | 39 ++++++--- pandas/tseries/offsets.py | 118 +++++++++++++++++++-------- 2 files changed, 113 insertions(+), 44 deletions(-) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index e03b3e0a85e5e..bb5f27acea731 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -162,6 +162,27 @@ def test_apply_out_of_range(self): "cannot create out_of_range offset: {0} {1}".format( str(self).split('.')[-1], e)) + def test_immutable(self): + if self._offset is None: + return + elif not issubclass(self._offset, DateOffset): + raise TypeError(self._offset) + + offset = self._get_offset(self._offset, value=14) + with pytest.raises(TypeError): + offset.n = 3 + with pytest.raises(TypeError): + offset._offset = timedelta(4) + with pytest.raises(TypeError): + offset.normalize = True + + if hasattr(offset, '_inc'): + with pytest.raises(TypeError): + offset._inc = 'foo' + if hasattr(offset, 'delta'): + with pytest.raises(TypeError): + offset.delta = 6 * offset._inc + class TestCommon(Base): @@ -550,8 +571,7 @@ def setup_method(self, method): def test_different_normalize_equals(self): # equivalent in this special case offset = BDay() - offset2 = BDay() - offset2.normalize = True + offset2 = BDay(normalize=True) assert offset == offset2 def test_repr(self): @@ -745,8 +765,7 @@ def test_constructor_errors(self): def test_different_normalize_equals(self): # equivalent in this special case offset = self._offset() - offset2 = self._offset() - offset2.normalize = True + offset2 = self._offset(normalize=True) assert offset == offset2 def test_repr(self): @@ -1437,8 +1456,7 @@ def test_constructor_errors(self): def test_different_normalize_equals(self): # equivalent in this special case offset = self._offset() - offset2 = self._offset() - offset2.normalize = True + offset2 = self._offset(normalize=True) assert offset == offset2 def test_repr(self): @@ -1678,8 +1696,7 @@ def setup_method(self, method): def test_different_normalize_equals(self): # equivalent in this special case offset = CDay() - offset2 = CDay() - offset2.normalize = True + offset2 = CDay(normalize=True) assert offset == offset2 def test_repr(self): @@ -1959,8 +1976,7 @@ class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base): def test_different_normalize_equals(self): # equivalent in this special case offset = CBMonthEnd() - offset2 = CBMonthEnd() - offset2.normalize = True + offset2 = CBMonthEnd(normalize=True) assert offset == offset2 def test_repr(self): @@ -2073,8 +2089,7 @@ class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base): def test_different_normalize_equals(self): # equivalent in this special case offset = CBMonthBegin() - offset2 = CBMonthBegin() - offset2.normalize = True + offset2 = CBMonthBegin(normalize=True) assert offset == offset2 def test_repr(self): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 2a120a0696836..88aac6634a8c8 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -10,7 +10,9 @@ # import after tools, dateutil check from dateutil.relativedelta import relativedelta, weekday from dateutil.easter import easter + from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta +from pandas._libs.lib import cache_readonly import functools import operator @@ -132,6 +134,33 @@ class CacheableOffset(object): _cacheable = True +_kwds_use_relativedelta = ( + 'years', 'months', 'weeks', 'days', + 'year', 'month', 'week', 'day', 'weekday', + 'hour', 'minute', 'second', 'microsecond' +) + + +def _determine_offset(kwds): + # timedelta is used for sub-daily plural offsets and all singular + # offsets relativedelta is used for plural offsets of daily length or + # more nanosecond(s) are handled by apply_wraps + kwds_no_nanos = dict( + (k, v) for k, v in kwds.items() + if k not in ('nanosecond', 'nanoseconds') + ) + + if len(kwds_no_nanos) > 0: + if any(k in _kwds_use_relativedelta for k in kwds_no_nanos): + offset = relativedelta(**kwds_no_nanos) + else: + # sub-daily offset - use timedelta (tz-aware) + offset = timedelta(**kwds_no_nanos) + else: + offset = timedelta(1) + return offset + + class DateOffset(object): """ Standard kind of date increment used for a date range. @@ -177,43 +206,37 @@ def __add__(date): """ _cacheable = False _normalize_cache = True - _kwds_use_relativedelta = ( - 'years', 'months', 'weeks', 'days', - 'year', 'month', 'week', 'day', 'weekday', - 'hour', 'minute', 'second', 'microsecond' - ) - _use_relativedelta = False _adjust_dst = False + _typ = "dateoffset" - # default for prior pickles - normalize = False + def __setattr__(self, name, value): + # DateOffset needs to be effectively immutable in order for the + # caching in _cached_params to be correct. + frozen = ['n', '_offset', 'normalize', '_inc'] + if name in frozen and hasattr(self, name): + msg = '%s cannot change attribute %s' % (self.__class__, name) + raise TypeError(msg) + object.__setattr__(self, name, value) + + def __setstate__(self, state): + """Reconstruct an instance from a pickled state""" + self.__dict__ = state + if 'normalize' not in state and not hasattr(self, 'normalize'): + # default for prior pickles + self.normalize = False def __init__(self, n=1, normalize=False, **kwds): self.n = int(n) self.normalize = normalize self.kwds = kwds - self._offset, self._use_relativedelta = self._determine_offset() - - def _determine_offset(self): - # timedelta is used for sub-daily plural offsets and all singular - # offsets relativedelta is used for plural offsets of daily length or - # more nanosecond(s) are handled by apply_wraps - kwds_no_nanos = dict( - (k, v) for k, v in self.kwds.items() - if k not in ('nanosecond', 'nanoseconds') - ) - use_relativedelta = False - - if len(kwds_no_nanos) > 0: - if any(k in self._kwds_use_relativedelta for k in kwds_no_nanos): - use_relativedelta = True - offset = relativedelta(**kwds_no_nanos) - else: - # sub-daily offset - use timedelta (tz-aware) - offset = timedelta(**kwds_no_nanos) - else: - offset = timedelta(1) - return offset, use_relativedelta + self._offset = _determine_offset(kwds) + + @property + def _use_relativedelta(self): + # We need to check for _offset existence because it may not exist + # if we are in the process of unpickling. + return (hasattr(self, '_offset') and + isinstance(self._offset, relativedelta)) @apply_wraps def apply(self, other): @@ -308,7 +331,24 @@ def copy(self): def _should_cache(self): return self.isAnchored() and self._cacheable + @cache_readonly + def _cached_params(self): + assert len(self.kwds) == 0 + all_paras = dict(list(vars(self).items())) + # equiv: self.__dict__.copy() + if 'holidays' in all_paras and not all_paras['holidays']: + all_paras.pop('holidays') + exclude = ['kwds', 'name', 'normalize', 'calendar'] + attrs = [(k, v) for k, v in all_paras.items() + if (k not in exclude) and (k[0] != '_')] + attrs = sorted(set(attrs)) + params = tuple([str(self.__class__)] + attrs) + return params + def _params(self): + if len(self.kwds) == 0: + return self._cached_params + all_paras = dict(list(vars(self).items()) + list(self.kwds.items())) if 'holidays' in all_paras and not all_paras['holidays']: all_paras.pop('holidays') @@ -578,6 +618,10 @@ def __setstate__(self, state): self.kwds['holidays'] = self.holidays = holidays self.kwds['weekmask'] = state['weekmask'] + if 'normalize' not in state: + # default for prior pickles + self.normalize = False + class BusinessDay(BusinessMixin, SingleConstructorOffset): """ @@ -591,6 +635,7 @@ def __init__(self, n=1, normalize=False, **kwds): self.normalize = normalize self.kwds = kwds self.offset = kwds.get('offset', timedelta(0)) + self._offset = None @property def freqstr(self): @@ -709,6 +754,7 @@ def __init__(self, **kwds): self.offset = kwds.get('offset', timedelta(0)) self.start = kwds.get('start', '09:00') self.end = kwds.get('end', '17:00') + self._offset = None def _validate_time(self, t_input): from datetime import time as dt_time @@ -986,6 +1032,7 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.kwds['weekmask'] = self.weekmask = weekmask self.kwds['holidays'] = self.holidays = holidays self.kwds['calendar'] = self.calendar = calendar + self._offset = None def get_calendar(self, weekmask, holidays, calendar): """Generate busdaycalendar""" @@ -1182,6 +1229,7 @@ def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): self.normalize = normalize self.kwds = kwds self.kwds['day_of_month'] = self.day_of_month + self._offset = None @classmethod def _from_name(cls, suffix=None): @@ -1580,6 +1628,7 @@ def __init__(self, n=1, normalize=False, **kwds): self._inc = timedelta(weeks=1) self.kwds = kwds + self._offset = None def isAnchored(self): return (self.n == 1 and self.weekday is not None) @@ -1702,6 +1751,7 @@ def __init__(self, n=1, normalize=False, **kwds): self.week) self.kwds = kwds + self._offset = None @apply_wraps def apply(self, other): @@ -1792,6 +1842,7 @@ def __init__(self, n=1, normalize=False, **kwds): self.weekday) self.kwds = kwds + self._offset = None @apply_wraps def apply(self, other): @@ -1857,8 +1908,8 @@ def __init__(self, n=1, normalize=False, **kwds): self.normalize = normalize self.startingMonth = kwds.get('startingMonth', self._default_startingMonth) - self.kwds = kwds + self._offset = None def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) @@ -1981,6 +2032,7 @@ def __init__(self, n=1, normalize=False, **kwds): self.startingMonth = kwds.get('startingMonth', 3) self.kwds = kwds + self._offset = None def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) @@ -2306,6 +2358,7 @@ def __init__(self, n=1, normalize=False, **kwds): self.variation = kwds["variation"] self.kwds = kwds + self._offset = None if self.n == 0: raise ValueError('N cannot be 0') @@ -2690,6 +2743,7 @@ def f(self, other): class Tick(SingleConstructorOffset): _inc = Timedelta(microseconds=1000) + _typ = "tick" __gt__ = _tick_comp(operator.gt) __ge__ = _tick_comp(operator.ge) @@ -2741,7 +2795,7 @@ def __ne__(self, other): else: return DateOffset.__ne__(self, other) - @property + @cache_readonly def delta(self): return self.n * self._inc From a888af50e8b44855aebefea2af5625535be9e9d6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 8 Aug 2017 19:30:44 -0700 Subject: [PATCH 2/5] Invalidate cache whenever an attribute is re-set --- pandas/tests/tseries/test_offsets.py | 22 +++++------- pandas/tseries/offsets.py | 52 +++++++++++----------------- 2 files changed, 28 insertions(+), 46 deletions(-) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index bb5f27acea731..926d0a33efc51 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -162,26 +162,20 @@ def test_apply_out_of_range(self): "cannot create out_of_range offset: {0} {1}".format( str(self).split('.')[-1], e)) - def test_immutable(self): + def test_cache_invalidation(self): if self._offset is None: return elif not issubclass(self._offset, DateOffset): raise TypeError(self._offset) offset = self._get_offset(self._offset, value=14) - with pytest.raises(TypeError): - offset.n = 3 - with pytest.raises(TypeError): - offset._offset = timedelta(4) - with pytest.raises(TypeError): - offset.normalize = True - - if hasattr(offset, '_inc'): - with pytest.raises(TypeError): - offset._inc = 'foo' - if hasattr(offset, 'delta'): - with pytest.raises(TypeError): - offset.delta = 6 * offset._inc + if len(offset.kwds) == 0: + cached_params = offset._params() + assert '_cached_params' in offset._cache + + offset.n = offset.n + 3 + # Setting offset.n should clear the cache + assert len(offset._cache) == 0 class TestCommon(Base): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 88aac6634a8c8..412008c31a0d0 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -134,13 +134,6 @@ class CacheableOffset(object): _cacheable = True -_kwds_use_relativedelta = ( - 'years', 'months', 'weeks', 'days', - 'year', 'month', 'week', 'day', 'weekday', - 'hour', 'minute', 'second', 'microsecond' -) - - def _determine_offset(kwds): # timedelta is used for sub-daily plural offsets and all singular # offsets relativedelta is used for plural offsets of daily length or @@ -150,6 +143,11 @@ def _determine_offset(kwds): if k not in ('nanosecond', 'nanoseconds') ) + _kwds_use_relativedelta = ( + 'years', 'months', 'weeks', 'days', + 'year', 'month', 'week', 'day', 'weekday', + 'hour', 'minute', 'second', 'microsecond' + ) if len(kwds_no_nanos) > 0: if any(k in _kwds_use_relativedelta for k in kwds_no_nanos): offset = relativedelta(**kwds_no_nanos) @@ -209,22 +207,25 @@ def __add__(date): _adjust_dst = False _typ = "dateoffset" + # default for prior pickles + normalize = False + def __setattr__(self, name, value): # DateOffset needs to be effectively immutable in order for the # caching in _cached_params to be correct. - frozen = ['n', '_offset', 'normalize', '_inc'] - if name in frozen and hasattr(self, name): - msg = '%s cannot change attribute %s' % (self.__class__, name) - raise TypeError(msg) + if hasattr(self, name): + # Resetting any existing attribute clears the cache_readonly + # cache. + try: + cache = self._cache + except AttributeError: + # if the cache_readonly cache has not been accessed yet, + # this attribute may not exist + pass + else: + cache.clear() object.__setattr__(self, name, value) - def __setstate__(self, state): - """Reconstruct an instance from a pickled state""" - self.__dict__ = state - if 'normalize' not in state and not hasattr(self, 'normalize'): - # default for prior pickles - self.normalize = False - def __init__(self, n=1, normalize=False, **kwds): self.n = int(n) self.normalize = normalize @@ -618,10 +619,6 @@ def __setstate__(self, state): self.kwds['holidays'] = self.holidays = holidays self.kwds['weekmask'] = state['weekmask'] - if 'normalize' not in state: - # default for prior pickles - self.normalize = False - class BusinessDay(BusinessMixin, SingleConstructorOffset): """ @@ -635,7 +632,6 @@ def __init__(self, n=1, normalize=False, **kwds): self.normalize = normalize self.kwds = kwds self.offset = kwds.get('offset', timedelta(0)) - self._offset = None @property def freqstr(self): @@ -754,7 +750,6 @@ def __init__(self, **kwds): self.offset = kwds.get('offset', timedelta(0)) self.start = kwds.get('start', '09:00') self.end = kwds.get('end', '17:00') - self._offset = None def _validate_time(self, t_input): from datetime import time as dt_time @@ -1032,7 +1027,6 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.kwds['weekmask'] = self.weekmask = weekmask self.kwds['holidays'] = self.holidays = holidays self.kwds['calendar'] = self.calendar = calendar - self._offset = None def get_calendar(self, weekmask, holidays, calendar): """Generate busdaycalendar""" @@ -1229,7 +1223,6 @@ def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): self.normalize = normalize self.kwds = kwds self.kwds['day_of_month'] = self.day_of_month - self._offset = None @classmethod def _from_name(cls, suffix=None): @@ -1628,7 +1621,6 @@ def __init__(self, n=1, normalize=False, **kwds): self._inc = timedelta(weeks=1) self.kwds = kwds - self._offset = None def isAnchored(self): return (self.n == 1 and self.weekday is not None) @@ -1751,7 +1743,6 @@ def __init__(self, n=1, normalize=False, **kwds): self.week) self.kwds = kwds - self._offset = None @apply_wraps def apply(self, other): @@ -1842,7 +1833,6 @@ def __init__(self, n=1, normalize=False, **kwds): self.weekday) self.kwds = kwds - self._offset = None @apply_wraps def apply(self, other): @@ -1908,8 +1898,8 @@ def __init__(self, n=1, normalize=False, **kwds): self.normalize = normalize self.startingMonth = kwds.get('startingMonth', self._default_startingMonth) + self.kwds = kwds - self._offset = None def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) @@ -2032,7 +2022,6 @@ def __init__(self, n=1, normalize=False, **kwds): self.startingMonth = kwds.get('startingMonth', 3) self.kwds = kwds - self._offset = None def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) @@ -2358,7 +2347,6 @@ def __init__(self, n=1, normalize=False, **kwds): self.variation = kwds["variation"] self.kwds = kwds - self._offset = None if self.n == 0: raise ValueError('N cannot be 0') From 5e2fe8f152b6f7ebab3d70c8da22ad02109f216a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 8 Aug 2017 19:35:28 -0700 Subject: [PATCH 3/5] Remove trailing whitespace --- pandas/tseries/offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 412008c31a0d0..56516c2cace8a 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1898,7 +1898,7 @@ def __init__(self, n=1, normalize=False, **kwds): self.normalize = normalize self.startingMonth = kwds.get('startingMonth', self._default_startingMonth) - + self.kwds = kwds def isAnchored(self): From e7babb69f97e0b788e15a42bd8fbedc10c51b378 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 8 Aug 2017 19:36:42 -0700 Subject: [PATCH 4/5] flake8 fix excess whitespace --- pandas/tests/tseries/test_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 926d0a33efc51..841f59e50a9b6 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -173,7 +173,7 @@ def test_cache_invalidation(self): cached_params = offset._params() assert '_cached_params' in offset._cache - offset.n = offset.n + 3 + offset.n = offset.n + 3 # Setting offset.n should clear the cache assert len(offset._cache) == 0 From 7690d297da9c810d539a55b1a18007a37f9de925 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 26 Aug 2017 12:25:04 -0700 Subject: [PATCH 5/5] lint fixup --- pandas/tests/tseries/test_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 841f59e50a9b6..38ede04a5dd55 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -170,7 +170,7 @@ def test_cache_invalidation(self): offset = self._get_offset(self._offset, value=14) if len(offset.kwds) == 0: - cached_params = offset._params() + _ = offset._params() assert '_cached_params' in offset._cache offset.n = offset.n + 3