From 7ff41db03f124b23358efd50c6ff305544b4dc12 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 2 Oct 2017 10:25:15 -0700 Subject: [PATCH 1/7] Move fields functions out of the way --- pandas/_libs/tslib.pyx | 504 +------------------------------ pandas/_libs/tslibs/fields.pyx | 534 +++++++++++++++++++++++++++++++++ setup.py | 2 + 3 files changed, 539 insertions(+), 501 deletions(-) create mode 100644 pandas/_libs/tslibs/fields.pyx diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 096ebe9a5627b..72977284668ce 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -90,6 +90,9 @@ from tslibs.timezones cimport ( get_timezone, get_utcoffset, maybe_get_tz, get_dst_info ) +from tslibs.fields import ( + get_date_name_field, get_start_end_field, get_date_field, + _isleapyear_arr, build_field_sarray) # noqa cdef inline object create_timestamp_from_ts( @@ -3830,48 +3833,6 @@ cdef inline bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): # Accessors #---------------------------------------------------------------------- -def build_field_sarray(ndarray[int64_t] dtindex): - """ - Datetime as int64 representation to a structured array of fields - """ - cdef: - Py_ssize_t i, count = 0 - pandas_datetimestruct dts - ndarray[int32_t] years, months, days, hours, minutes, seconds, mus - - count = len(dtindex) - - sa_dtype = [('Y', 'i4'), # year - ('M', 'i4'), # month - ('D', 'i4'), # day - ('h', 'i4'), # hour - ('m', 'i4'), # min - ('s', 'i4'), # second - ('u', 'i4')] # microsecond - - out = np.empty(count, dtype=sa_dtype) - - years = out['Y'] - months = out['M'] - days = out['D'] - hours = out['h'] - minutes = out['m'] - seconds = out['s'] - mus = out['u'] - - for i in range(count): - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - years[i] = dts.year - months[i] = dts.month - days[i] = dts.day - hours[i] = dts.hour - minutes[i] = dts.min - seconds[i] = dts.sec - mus[i] = dts.us - - return out - - def get_time_micros(ndarray[int64_t] dtindex): """ Datetime as int64 representation to a structured array of fields @@ -3891,453 +3852,6 @@ def get_time_micros(ndarray[int64_t] dtindex): return micros -@cython.wraparound(False) -@cython.boundscheck(False) -def get_date_field(ndarray[int64_t] dtindex, object field): - """ - Given a int64-based datetime index, extract the year, month, etc., - field and return an array of these values. - """ - cdef: - Py_ssize_t i, count = 0 - ndarray[int32_t] out - ndarray[int32_t, ndim=2] _month_offset - int isleap, isleap_prev - pandas_datetimestruct dts - int mo_off, doy, dow, woy - - _month_offset = np.array( - [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], - [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], - dtype=np.int32 ) - - count = len(dtindex) - out = np.empty(count, dtype='i4') - - if field == 'Y': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.year - return out - - elif field == 'M': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.month - return out - - elif field == 'D': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.day - return out - - elif field == 'h': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.hour - return out - - elif field == 'm': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.min - return out - - elif field == 's': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.sec - return out - - elif field == 'us': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.us - return out - - elif field == 'ns': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.ps / 1000 - return out - elif field == 'doy': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - out[i] = _month_offset[isleap, dts.month -1] + dts.day - return out - - elif field == 'dow': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dayofweek(dts.year, dts.month, dts.day) - return out - - elif field == 'woy': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - isleap_prev = is_leapyear(dts.year - 1) - mo_off = _month_offset[isleap, dts.month - 1] - doy = mo_off + dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - #estimate - woy = (doy - 1) - dow + 3 - if woy >= 0: - woy = woy / 7 + 1 - - # verify - if woy < 0: - if (woy > -2) or (woy == -2 and isleap_prev): - woy = 53 - else: - woy = 52 - elif woy == 53: - if 31 - dts.day + dow < 3: - woy = 1 - - out[i] = woy - return out - - elif field == 'q': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.month - out[i] = ((out[i] - 1) / 3) + 1 - return out - - elif field == 'dim': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = days_in_month(dts) - return out - elif field == 'is_leap_year': - return _isleapyear_arr(get_date_field(dtindex, 'Y')) - - raise ValueError("Field %s not supported" % field) - - -@cython.wraparound(False) -def get_start_end_field(ndarray[int64_t] dtindex, object field, - object freqstr=None, int month_kw=12): - """ - Given an int64-based datetime index return array of indicators - of whether timestamps are at the start/end of the month/quarter/year - (defined by frequency). - """ - cdef: - Py_ssize_t i - int count = 0 - bint is_business = 0 - int end_month = 12 - int start_month = 1 - ndarray[int8_t] out - ndarray[int32_t, ndim=2] _month_offset - bint isleap - pandas_datetimestruct dts - int mo_off, dom, doy, dow, ldom - - _month_offset = np.array( - [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], - [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], - dtype=np.int32 ) - - count = len(dtindex) - out = np.zeros(count, dtype='int8') - - if freqstr: - if freqstr == 'C': - raise ValueError( - "Custom business days is not supported by %s" % field) - is_business = freqstr[0] == 'B' - - # YearBegin(), BYearBegin() use month = starting month of year. - # QuarterBegin(), BQuarterBegin() use startingMonth = starting - # month of year. Other offests use month, startingMonth as ending - # month of year. - - if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( - freqstr[1:3] in ['MS', 'QS', 'AS']): - end_month = 12 if month_kw == 1 else month_kw - 1 - start_month = month_kw - else: - end_month = month_kw - start_month = (end_month % 12) + 1 - else: - end_month = 12 - start_month = 1 - - if field == 'is_month_start': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - - if dom == 1: - out[i] = 1 - return out.view(bool) - - elif field == 'is_month_end': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - dow = dayofweek(dts.year, dts.month, dts.day) - - if (ldom == doy and dow < 5) or ( - dow == 4 and (ldom - doy <= 2)): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - - if ldom == doy: - out[i] = 1 - return out.view(bool) - - elif field == 'is_quarter_start': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - if ((dts.month - start_month) % 3 == 0) and ( - (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - - if ((dts.month - start_month) % 3 == 0) and dom == 1: - out[i] = 1 - return out.view(bool) - - elif field == 'is_quarter_end': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - dow = dayofweek(dts.year, dts.month, dts.day) - - if ((dts.month - end_month) % 3 == 0) and ( - (ldom == doy and dow < 5) or ( - dow == 4 and (ldom - doy <= 2))): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - - if ((dts.month - end_month) % 3 == 0) and (ldom == doy): - out[i] = 1 - return out.view(bool) - - elif field == 'is_year_start': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - if (dts.month == start_month) and ( - (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - - if (dts.month == start_month) and dom == 1: - out[i] = 1 - return out.view(bool) - - elif field == 'is_year_end': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - dom = dts.day - mo_off = _month_offset[isleap, dts.month - 1] - doy = mo_off + dom - dow = dayofweek(dts.year, dts.month, dts.day) - ldom = _month_offset[isleap, dts.month] - - if (dts.month == end_month) and ( - (ldom == doy and dow < 5) or ( - dow == 4 and (ldom - doy <= 2))): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - - if (dts.month == end_month) and (ldom == doy): - out[i] = 1 - return out.view(bool) - - raise ValueError("Field %s not supported" % field) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def get_date_name_field(ndarray[int64_t] dtindex, object field): - """ - Given a int64-based datetime index, return array of strings of date - name based on requested field (e.g. weekday_name) - """ - cdef: - Py_ssize_t i, count = 0 - ndarray[object] out - pandas_datetimestruct dts - int dow - - _dayname = np.array( - ['Monday', 'Tuesday', 'Wednesday', 'Thursday', - 'Friday', 'Saturday', 'Sunday'], - dtype=np.object_ ) - - count = len(dtindex) - out = np.empty(count, dtype=object) - - if field == 'weekday_name': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = np.nan; continue - - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - dow = dayofweek(dts.year, dts.month, dts.day) - out[i] = _dayname[dow] - return out - - raise ValueError("Field %s not supported" % field) - - cdef int64_t DAY_NS = 86400000000000LL @@ -4471,18 +3985,6 @@ def dates_normalized(ndarray[int64_t] stamps, tz=None): #---------------------------------------------------------------------- -cpdef _isleapyear_arr(ndarray years): - cdef: - ndarray[int8_t] out - - # to make NaT result as False - out = np.zeros(len(years), dtype='int8') - out[np.logical_or(years % 400 == 0, - np.logical_and(years % 4 == 0, - years % 100 > 0))] = 1 - return out.view(bool) - - def monthrange(int64_t year, int64_t month): cdef: int64_t days diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx new file mode 100644 index 0000000000000..33ca026a8ef51 --- /dev/null +++ b/pandas/_libs/tslibs/fields.pyx @@ -0,0 +1,534 @@ +# -*- coding: utf-8 -*- +# cython: profile=False +# cython: linetrace=False +# distutils: define_macros=CYTHON_TRACE=0 +# distutils: define_macros=CYTHON_TRACE_NOGIL=0 +""" +Functions for accessing attributes of Timestamp/datetime64/datetime-like +objects and arrays +""" + +cimport cython +from cython cimport Py_ssize_t + +import numpy as np +cimport numpy as np +from numpy cimport ndarray, int64_t, int32_t, int8_t +np.import_array() + +cdef int64_t NPY_NAT = np.datetime64('NaT').astype(np.int64) + +from datetime cimport ( + pandas_datetimestruct, + pandas_datetime_to_datetimestruct, + PANDAS_FR_ns, + days_per_month_table, + is_leapyear, + dayofweek) + + +def build_field_sarray(ndarray[int64_t] dtindex): + """ + Datetime as int64 representation to a structured array of fields + """ + cdef: + Py_ssize_t i, count = 0 + pandas_datetimestruct dts + ndarray[int32_t] years, months, days, hours, minutes, seconds, mus + + count = len(dtindex) + + sa_dtype = [('Y', 'i4'), # year + ('M', 'i4'), # month + ('D', 'i4'), # day + ('h', 'i4'), # hour + ('m', 'i4'), # min + ('s', 'i4'), # second + ('u', 'i4')] # microsecond + + out = np.empty(count, dtype=sa_dtype) + + years = out['Y'] + months = out['M'] + days = out['D'] + hours = out['h'] + minutes = out['m'] + seconds = out['s'] + mus = out['u'] + + for i in range(count): + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + years[i] = dts.year + months[i] = dts.month + days[i] = dts.day + hours[i] = dts.hour + minutes[i] = dts.min + seconds[i] = dts.sec + mus[i] = dts.us + + return out + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_name_field(ndarray[int64_t] dtindex, object field): + """ + Given a int64-based datetime index, return array of strings of date + name based on requested field (e.g. weekday_name) + """ + cdef: + Py_ssize_t i, count = 0 + ndarray[object] out + pandas_datetimestruct dts + int dow + + _dayname = np.array( + ['Monday', 'Tuesday', 'Wednesday', 'Thursday', + 'Friday', 'Saturday', 'Sunday'], + dtype=np.object_) + + count = len(dtindex) + out = np.empty(count, dtype=object) + + if field == 'weekday_name': + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = np.nan + continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + dow = dayofweek(dts.year, dts.month, dts.day) + out[i] = _dayname[dow] + return out + + raise ValueError("Field %s not supported" % field) + + +@cython.wraparound(False) +def get_start_end_field(ndarray[int64_t] dtindex, object field, + object freqstr=None, int month_kw=12): + """ + Given an int64-based datetime index return array of indicators + of whether timestamps are at the start/end of the month/quarter/year + (defined by frequency). + """ + cdef: + Py_ssize_t i + int count = 0 + bint is_business = 0 + int end_month = 12 + int start_month = 1 + ndarray[int8_t] out + ndarray[int32_t, ndim=2] _month_offset + bint isleap + pandas_datetimestruct dts + int mo_off, dom, doy, dow, ldom + + _month_offset = np.array( + [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], + [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], + dtype=np.int32 ) + + count = len(dtindex) + out = np.zeros(count, dtype='int8') + + if freqstr: + if freqstr == 'C': + raise ValueError( + "Custom business days is not supported by %s" % field) + is_business = freqstr[0] == 'B' + + # YearBegin(), BYearBegin() use month = starting month of year. + # QuarterBegin(), BQuarterBegin() use startingMonth = starting + # month of year. Other offests use month, startingMonth as ending + # month of year. + + if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( + freqstr[1:3] in ['MS', 'QS', 'AS']): + end_month = 12 if month_kw == 1 else month_kw - 1 + start_month = month_kw + else: + end_month = month_kw + start_month = (end_month % 12) + 1 + else: + end_month = 12 + start_month = 1 + + if field == 'is_month_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + + if dom == 1: + out[i] = 1 + return out.view(bool) + + elif field == 'is_month_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + dow = dayofweek(dts.year, dts.month, dts.day) + + if (ldom == doy and dow < 5) or ( + dow == 4 and (ldom - doy <= 2)): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if ldom == doy: + out[i] = 1 + return out.view(bool) + + elif field == 'is_quarter_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + if ((dts.month - start_month) % 3 == 0) and ( + (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + + if ((dts.month - start_month) % 3 == 0) and dom == 1: + out[i] = 1 + return out.view(bool) + + elif field == 'is_quarter_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + dow = dayofweek(dts.year, dts.month, dts.day) + + if ((dts.month - end_month) % 3 == 0) and ( + (ldom == doy and dow < 5) or ( + dow == 4 and (ldom - doy <= 2))): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if ((dts.month - end_month) % 3 == 0) and (ldom == doy): + out[i] = 1 + return out.view(bool) + + elif field == 'is_year_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + if (dts.month == start_month) and ( + (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + + if (dts.month == start_month) and dom == 1: + out[i] = 1 + return out.view(bool) + + elif field == 'is_year_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + dom = dts.day + mo_off = _month_offset[isleap, dts.month - 1] + doy = mo_off + dom + dow = dayofweek(dts.year, dts.month, dts.day) + ldom = _month_offset[isleap, dts.month] + + if (dts.month == end_month) and ( + (ldom == doy and dow < 5) or ( + dow == 4 and (ldom - doy <= 2))): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if (dts.month == end_month) and (ldom == doy): + out[i] = 1 + return out.view(bool) + + raise ValueError("Field %s not supported" % field) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_field(ndarray[int64_t] dtindex, object field): + """ + Given a int64-based datetime index, extract the year, month, etc., + field and return an array of these values. + """ + cdef: + Py_ssize_t i, count = 0 + ndarray[int32_t] out + ndarray[int32_t, ndim=2] _month_offset + int isleap, isleap_prev + pandas_datetimestruct dts + int mo_off, doy, dow, woy + + _month_offset = np.array( + [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], + [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], + dtype=np.int32 ) + + count = len(dtindex) + out = np.empty(count, dtype='i4') + + if field == 'Y': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.year + return out + + elif field == 'M': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.month + return out + + elif field == 'D': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.day + return out + + elif field == 'h': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.hour + return out + + elif field == 'm': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.min + return out + + elif field == 's': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.sec + return out + + elif field == 'us': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.us + return out + + elif field == 'ns': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.ps / 1000 + return out + elif field == 'doy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + out[i] = _month_offset[isleap, dts.month -1] + dts.day + return out + + elif field == 'dow': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dayofweek(dts.year, dts.month, dts.day) + return out + + elif field == 'woy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + isleap_prev = is_leapyear(dts.year - 1) + mo_off = _month_offset[isleap, dts.month - 1] + doy = mo_off + dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + #estimate + woy = (doy - 1) - dow + 3 + if woy >= 0: + woy = woy / 7 + 1 + + # verify + if woy < 0: + if (woy > -2) or (woy == -2 and isleap_prev): + woy = 53 + else: + woy = 52 + elif woy == 53: + if 31 - dts.day + dow < 3: + woy = 1 + + out[i] = woy + return out + + elif field == 'q': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.month + out[i] = ((out[i] - 1) / 3) + 1 + return out + + elif field == 'dim': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = days_in_month(dts) + return out + elif field == 'is_leap_year': + return _isleapyear_arr(get_date_field(dtindex, 'Y')) + + raise ValueError("Field %s not supported" % field) + + +cdef inline int days_in_month(pandas_datetimestruct dts) nogil: + return days_per_month_table[is_leapyear(dts.year)][dts.month -1] + + +cpdef _isleapyear_arr(ndarray years): + cdef: + ndarray[int8_t] out + + # to make NaT result as False + out = np.zeros(len(years), dtype='int8') + out[np.logical_or(years % 400 == 0, + np.logical_and(years % 4 == 0, + years % 100 > 0))] = 1 + return out.view(bool) diff --git a/setup.py b/setup.py index 793aa089e708f..5ab943f87b7ac 100755 --- a/setup.py +++ b/setup.py @@ -343,6 +343,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/parsers.pyx', 'pandas/_libs/tslibs/strptime.pyx', 'pandas/_libs/tslibs/timezones.pyx', + 'pandas/_libs/tslibs/fields.pyx', 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/_libs/tslibs/parsing.pyx', 'pandas/io/sas/sas.pyx'] @@ -486,6 +487,7 @@ def pxd(name): 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, + '_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields'}, '_libs.period': {'pyxfile': '_libs/period', 'depends': (tseries_depends + ['pandas/_libs/src/period_helper.h']), From dbf4c75ea5048d93f0b4d484a4ffffd09f881d6d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 3 Oct 2017 16:54:05 -0700 Subject: [PATCH 2/7] troubleshoot ci failure --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5ab943f87b7ac..84535ab230115 100755 --- a/setup.py +++ b/setup.py @@ -487,7 +487,8 @@ def pxd(name): 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, - '_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields'}, + '_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields', + 'depends': tseries_depends}, '_libs.period': {'pyxfile': '_libs/period', 'depends': (tseries_depends + ['pandas/_libs/src/period_helper.h']), From 7d8f42184429394e2d1a0ebe002a64e5569751df Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 3 Oct 2017 18:28:48 -0700 Subject: [PATCH 3/7] dummy commit (that fixes a flake8 complaint) to force CI --- pandas/_libs/tslibs/fields.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 33ca026a8ef51..2341ef7105f7a 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -127,7 +127,7 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, _month_offset = np.array( [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], - dtype=np.int32 ) + dtype=np.int32) count = len(dtindex) out = np.zeros(count, dtype='int8') From e5f77e4900e5e727544ae1319a53946ec72a4082 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 3 Oct 2017 19:22:14 -0700 Subject: [PATCH 4/7] add sources to setup to try to fix appveyor build --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 84535ab230115..b1685e8e5de62 100755 --- a/setup.py +++ b/setup.py @@ -488,7 +488,9 @@ def pxd(name): 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, '_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields', - 'depends': tseries_depends}, + 'depends': tseries_depends + 'sources': ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.period': {'pyxfile': '_libs/period', 'depends': (tseries_depends + ['pandas/_libs/src/period_helper.h']), From 9aa1eb3b6241be9c58334ae2574a53dcf286c972 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 3 Oct 2017 19:36:51 -0700 Subject: [PATCH 5/7] flake8 whitespace fixup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b1685e8e5de62..afeab6e8d7369 100755 --- a/setup.py +++ b/setup.py @@ -490,7 +490,7 @@ def pxd(name): '_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields', 'depends': tseries_depends 'sources': ['pandas/_libs/src/datetime/np_datetime.c', - 'pandas/_libs/src/datetime/np_datetime_strings.c']}, + 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.period': {'pyxfile': '_libs/period', 'depends': (tseries_depends + ['pandas/_libs/src/period_helper.h']), From 7b12b5a958d91b2027b085365fc4cc3a8eb540b2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 3 Oct 2017 22:14:10 -0700 Subject: [PATCH 6/7] fixup typo --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index afeab6e8d7369..f58f8cbb5519d 100755 --- a/setup.py +++ b/setup.py @@ -488,7 +488,7 @@ def pxd(name): 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, '_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields', - 'depends': tseries_depends + 'depends': tseries_depends, 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.period': {'pyxfile': '_libs/period', From 0e9e1e5c8ee6950df80e542d8d6bacf2487d8af2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 4 Oct 2017 07:36:17 -0700 Subject: [PATCH 7/7] reviewer comments --- pandas/_libs/tslib.pyx | 2 +- pandas/_libs/tslibs/fields.pyx | 11 +++++++---- pandas/core/indexes/period.py | 3 ++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 72977284668ce..9fd1f367d013d 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -92,7 +92,7 @@ from tslibs.timezones cimport ( ) from tslibs.fields import ( get_date_name_field, get_start_end_field, get_date_field, - _isleapyear_arr, build_field_sarray) # noqa + build_field_sarray) cdef inline object create_timestamp_from_ts( diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 2341ef7105f7a..3ea414b2d4a70 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -16,7 +16,6 @@ cimport numpy as np from numpy cimport ndarray, int64_t, int32_t, int8_t np.import_array() -cdef int64_t NPY_NAT = np.datetime64('NaT').astype(np.int64) from datetime cimport ( pandas_datetimestruct, @@ -26,6 +25,10 @@ from datetime cimport ( is_leapyear, dayofweek) +cimport util + +cdef int64_t NPY_NAT = util.get_nat() + def build_field_sarray(ndarray[int64_t] dtindex): """ @@ -513,7 +516,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): out[i] = days_in_month(dts) return out elif field == 'is_leap_year': - return _isleapyear_arr(get_date_field(dtindex, 'Y')) + return isleapyear_arr(get_date_field(dtindex, 'Y')) raise ValueError("Field %s not supported" % field) @@ -522,11 +525,11 @@ cdef inline int days_in_month(pandas_datetimestruct dts) nogil: return days_per_month_table[is_leapyear(dts.year)][dts.month -1] -cpdef _isleapyear_arr(ndarray years): +cpdef isleapyear_arr(ndarray years): + """vectorized version of isleapyear; NaT evaluates as False""" cdef: ndarray[int8_t] out - # to make NaT result as False out = np.zeros(len(years), dtype='int8') out[np.logical_or(years % 400 == 0, np.logical_and(years % 4 == 0, diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index fb47d1db48610..e6fc47845012a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -35,6 +35,7 @@ from pandas._libs.period import (Period, IncompatibleFrequency, get_period_field_arr, _validate_end_alias, _quarter_to_myear) +from pandas._libs.tslibs.fields import isleapyear_arr from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, _ensure_index @@ -589,7 +590,7 @@ def to_datetime(self, dayfirst=False): @property def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ - return tslib._isleapyear_arr(np.asarray(self.year)) + return isleapyear_arr(np.asarray(self.year)) @property def start_time(self):