-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement BaseOffset in tslibs.offsets #18016
Changes from 5 commits
a728d99
8eb131e
4a4ef1c
6c7db0a
20f2d8b
8da5a84
58ffc7c
7945386
83d26fa
b281093
db38ef2
673aa29
74e1eed
93f9a75
61bf134
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,9 @@ | |
cimport cython | ||
|
||
import time | ||
from cpython.datetime cimport time as dt_time | ||
from cpython.datetime cimport timedelta, time as dt_time | ||
|
||
from dateutil.relativedelta import relativedelta | ||
|
||
import numpy as np | ||
cimport numpy as np | ||
|
@@ -15,6 +17,7 @@ from util cimport is_string_object | |
|
||
|
||
from pandas._libs.tslib import pydt_to_i8, tz_convert_single | ||
from frequencies cimport get_freq_code | ||
|
||
# --------------------------------------------------------------------- | ||
# Constants | ||
|
@@ -105,17 +108,38 @@ def as_datetime(obj): | |
return obj | ||
|
||
|
||
def _is_normalized(dt): | ||
cpdef bint _is_normalized(dt): | ||
if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or | ||
dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0): | ||
return False | ||
return True | ||
|
||
|
||
def apply_index_wraps(func): | ||
# Note: normally we would use `@functools.wraps(func)`, but this does | ||
# not play nicely wtih cython class methods | ||
def wrapper(self, other): | ||
result = func(self, other) | ||
if self.normalize: | ||
result = result.to_period('D').to_timestamp() | ||
return result | ||
|
||
# do @functools.wraps(func) manually since it doesn't work on cdef funcs | ||
wrapper.__name__ = func.__name__ | ||
wrapper.__doc__ = func.__doc__ | ||
try: | ||
wrapper.__module__ = func.__module__ | ||
except AttributeError: | ||
# AttributeError: 'method_descriptor' object has no | ||
# attribute '__module__' | ||
pass | ||
return wrapper | ||
|
||
|
||
# --------------------------------------------------------------------- | ||
# Business Helpers | ||
|
||
def _get_firstbday(wkday): | ||
cpdef int _get_firstbday(int wkday): | ||
""" | ||
wkday is the result of monthrange(year, month) | ||
|
||
|
@@ -194,6 +218,48 @@ def _validate_business_time(t_input): | |
else: | ||
raise ValueError("time data must be string or datetime.time") | ||
|
||
|
||
# --------------------------------------------------------------------- | ||
# Constructor Helpers | ||
|
||
_rd_kwds = set([ | ||
'years', 'months', 'weeks', 'days', | ||
'year', 'month', 'week', 'day', 'weekday', | ||
'hour', 'minute', 'second', 'microsecond', | ||
'nanosecond', 'nanoseconds', | ||
'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds' | ||
]) | ||
|
||
|
||
def _determine_offset(kwds): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment this is a method of |
||
# timedelta is used for sub-daily plural offsets and all singular | ||
# offsets relativedelta is used for plural offsets of daily length or | ||
# more nanosecond(s) are handled by apply_wraps | ||
kwds_no_nanos = dict( | ||
(k, v) for k, v in kwds.items() | ||
if k not in ('nanosecond', 'nanoseconds') | ||
) | ||
# TODO: Are nanosecond and nanoseconds allowed somewhere? | ||
|
||
_kwds_use_relativedelta = ( | ||
'years', 'months', 'weeks', 'days', | ||
'year', 'month', 'week', 'day', 'weekday', | ||
'hour', 'minute', 'second', 'microsecond' | ||
) | ||
|
||
use_relativedelta = False | ||
if len(kwds_no_nanos) > 0: | ||
if any(k in _kwds_use_relativedelta for k in kwds_no_nanos): | ||
offset = relativedelta(**kwds_no_nanos) | ||
use_relativedelta = True | ||
else: | ||
# sub-daily offset - use timedelta (tz-aware) | ||
offset = timedelta(**kwds_no_nanos) | ||
else: | ||
offset = timedelta(1) | ||
return offset, use_relativedelta | ||
|
||
|
||
# --------------------------------------------------------------------- | ||
# Mixins & Singletons | ||
|
||
|
@@ -206,3 +272,109 @@ class ApplyTypeError(TypeError): | |
# TODO: unused. remove? | ||
class CacheableOffset(object): | ||
_cacheable = True | ||
|
||
|
||
class BeginMixin(object): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BeginMixin and EndMixin are new, each only have the one method. At the moment these methods are in DateOffset, but they are only used by a small handful of FooBegin and BarEnd subclasses. |
||
# helper for vectorized offsets | ||
|
||
def _beg_apply_index(self, i, freq): | ||
"""Offsets index to beginning of Period frequency""" | ||
|
||
off = i.to_perioddelta('D') | ||
|
||
base, mult = get_freq_code(freq) | ||
base_period = i.to_period(base) | ||
if self.n <= 0: | ||
# when subtracting, dates on start roll to prior | ||
roll = np.where(base_period.to_timestamp() == i - off, | ||
self.n, self.n + 1) | ||
else: | ||
roll = self.n | ||
|
||
base = (base_period + roll).to_timestamp() | ||
return base + off | ||
|
||
|
||
class EndMixin(object): | ||
# helper for vectorized offsets | ||
|
||
def _end_apply_index(self, i, freq): | ||
"""Offsets index to end of Period frequency""" | ||
|
||
off = i.to_perioddelta('D') | ||
|
||
base, mult = get_freq_code(freq) | ||
base_period = i.to_period(base) | ||
if self.n > 0: | ||
# when adding, dates on end roll to next | ||
roll = np.where(base_period.to_timestamp(how='end') == i - off, | ||
self.n, self.n - 1) | ||
else: | ||
roll = self.n | ||
|
||
base = (base_period + roll).to_timestamp(how='end') | ||
return base + off | ||
|
||
|
||
# --------------------------------------------------------------------- | ||
# Base Classes | ||
|
||
class _BaseOffset(object): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you creating a base class here? what is the purpose? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IOW why not simply have 1 Base class (and not a _BaseOffset and a BaseOffset) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comments about remaining cython/pickle issues. You're absolutely right that in its current form having two separate classes accomplishes nothing. The idea is that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok that is fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would probably leave this as a class for the moment. I am not convinced this actually needs to be a full c-extension class (e.g. its not like we are inheriting from a python c-class here). I don't see the benefit and it has added complexity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main reason is to achieve immutability. That's the big roadblock between us and making There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
""" | ||
Base class for DateOffset methods that are not overriden by subclasses | ||
and will (after pickle errors are resolved) go into a cdef class. | ||
""" | ||
_typ = "dateoffset" | ||
_normalize_cache = True | ||
_cacheable = False | ||
|
||
def __call__(self, other): | ||
return self.apply(other) | ||
|
||
def __mul__(self, someInt): | ||
return self.__class__(n=someInt * self.n, normalize=self.normalize, | ||
**self.kwds) | ||
|
||
def __neg__(self): | ||
# Note: we are defering directly to __mul__ instead of __rmul__, as | ||
# that allows us to use methods that can go in a `cdef class` | ||
return self * -1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the status quo |
||
|
||
def copy(self): | ||
# Note: we are defering directly to __mul__ instead of __rmul__, as | ||
# that allows us to use methods that can go in a `cdef class` | ||
return self * 1 | ||
|
||
# TODO: this is never true. fix it or get rid of it | ||
def _should_cache(self): | ||
return self.isAnchored() and self._cacheable | ||
|
||
def __repr__(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. side note, the repr is currently used for hashing, but instead should simply define There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
className = getattr(self, '_outputName', type(self).__name__) | ||
|
||
if abs(self.n) != 1: | ||
plural = 's' | ||
else: | ||
plural = '' | ||
|
||
n_str = "" | ||
if self.n != 1: | ||
n_str = "%s * " % self.n | ||
|
||
out = '<%s' % n_str + className + plural + self._repr_attrs() + '>' | ||
return out | ||
|
||
|
||
class BaseOffset(_BaseOffset): | ||
# Here we add __rfoo__ methods that don't play well with cdef classes | ||
def __rmul__(self, someInt): | ||
return self.__mul__(someInt) | ||
|
||
def __radd__(self, other): | ||
return self.__add__(other) | ||
|
||
def __rsub__(self, other): | ||
if getattr(other, '_typ', None) in ['datetimeindex', 'series']: | ||
# i.e. isinstance(other, (ABCDatetimeIndex, ABCSeries)) | ||
return other - self | ||
return -self + other |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,9 @@ | |
from pandas.tseries.holiday import USFederalHolidayCalendar | ||
|
||
|
||
data_dir = tm.get_data_path() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moving this call to up here ensures that we get the same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. huh? we use this pattern everywhere, why are you changing this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because when I try to run these tests interactively and copy/paste the contents of a test function, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do you mean 'interactively'? you should simply be running
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When a test fails and I want to figure out why, I run the contents of the test manually in the REPL. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Happy to revert this change; not that big a deal. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes pls revert. standard way to run tests is
lots of options, including pls revert this is non-standard |
||
|
||
|
||
def test_monthrange(): | ||
import calendar | ||
for y in range(2000, 2013): | ||
|
@@ -480,7 +483,7 @@ def test_pickle_v0_15_2(self): | |
'Day': Day(1), | ||
'YearBegin': YearBegin(1), | ||
'Week': Week(1)} | ||
pickle_path = os.path.join(tm.get_data_path(), | ||
pickle_path = os.path.join(data_dir, | ||
'dateoffset_0_15_2.pickle') | ||
# This code was executed once on v0.15.2 to generate the pickle: | ||
# with open(pickle_path, 'wb') as f: pickle.dump(offsets, f) | ||
|
@@ -1884,7 +1887,7 @@ def _check_roundtrip(obj): | |
def test_pickle_compat_0_14_1(self): | ||
hdays = [datetime(2013, 1, 1) for ele in range(4)] | ||
|
||
pth = tm.get_data_path() | ||
pth = data_dir | ||
|
||
cday0_14_1 = read_pickle(os.path.join(pth, 'cday-0.14.1.pickle')) | ||
cday = CDay(holidays=hdays) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
update setup.py for this