From 59140c36cecaa9e6e6ec76048f4ff103c1d132c3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 31 Jul 2018 14:23:50 -0700 Subject: [PATCH 1/2] use memoryviews instead of ndarrays --- pandas/_libs/hashing.pyx | 8 +++--- pandas/_libs/tslib.pyx | 7 +++--- pandas/_libs/tslibs/conversion.pyx | 22 +++++++++++------ pandas/_libs/tslibs/parsing.pyx | 37 ++++++++++++++-------------- pandas/_libs/tslibs/period.pyx | 39 +++++++++++++++--------------- pandas/_libs/tslibs/resolution.pyx | 7 +++--- pandas/_libs/tslibs/strptime.pyx | 10 ++++---- pandas/_libs/tslibs/timedeltas.pxd | 4 +-- pandas/_libs/tslibs/timedeltas.pyx | 14 +++++------ pandas/_libs/tslibs/timestamps.pyx | 6 ++--- pandas/_libs/tslibs/timezones.pyx | 7 +++--- 11 files changed, 84 insertions(+), 77 deletions(-) diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index ff92ee306288a..16cfde620d269 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -5,7 +5,7 @@ import cython import numpy as np -from numpy cimport ndarray, uint8_t, uint32_t, uint64_t +from numpy cimport uint8_t, uint32_t, uint64_t from util cimport _checknull from cpython cimport (PyBytes_Check, @@ -17,7 +17,7 @@ DEF dROUNDS = 4 @cython.boundscheck(False) -def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'): +def hash_object_array(object[:] arr, object key, object encoding='utf8'): """ Parameters ---------- @@ -37,7 +37,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'): """ cdef: Py_ssize_t i, l, n - ndarray[uint64_t] result + uint64_t[:] result bytes data, k uint8_t *kb uint64_t *lens @@ -89,7 +89,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'): free(vecs) free(lens) - return result + return result.base # .base to retrieve underlying np.ndarray cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 76e3d6e92d31e..18947166a9d84 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -77,8 +77,7 @@ cdef inline object create_time_from_ts( return time(dts.hour, dts.min, dts.sec, dts.us) -def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, - box="datetime"): +def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp @@ -102,7 +101,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, cdef: Py_ssize_t i, n = len(arr) - ndarray[int64_t] trans, deltas + ndarray[int64_t] trans + int64_t[:] deltas + Py_ssize_t pos npy_datetimestruct dts object dt int64_t value, delta diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 4335e7baeafe9..805f2eb276372 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -525,7 +525,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): Sets obj.tzinfo inplace, alters obj.dts inplace. """ cdef: - ndarray[int64_t] trans, deltas + ndarray[int64_t] trans + int64_t[:] deltas int64_t local_val Py_ssize_t pos @@ -632,14 +633,15 @@ cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz, Py_ssize_t n = len(values) Py_ssize_t i, j, pos ndarray[int64_t] result = np.empty(n, dtype=np.int64) - ndarray[int64_t] tt, trans, deltas + ndarray[int64_t] tt, trans + int64_t[:] deltas ndarray[Py_ssize_t] posn int64_t v trans, deltas, typ = get_dst_info(tz) if not to_utc: # We add `offset` below instead of subtracting it - deltas = -1 * deltas + deltas = -1 * deltas.base # `.base` to access underlying ndarray tt = values[values != NPY_NAT] if not len(tt): @@ -728,7 +730,8 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): converted: int64 """ cdef: - ndarray[int64_t] trans, deltas + ndarray[int64_t] trans + int64_t[:] deltas Py_ssize_t pos int64_t v, offset, utc_date npy_datetimestruct dts @@ -843,7 +846,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, localized : ndarray[int64_t] """ cdef: - ndarray[int64_t] trans, deltas, idx_shifted + ndarray[int64_t] trans, idx_shifted + int64_t[:] deltas ndarray ambiguous_array Py_ssize_t i, idx, pos, ntrans, n = len(vals) int64_t *tdata @@ -1124,7 +1128,8 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz): cdef: Py_ssize_t n = len(stamps) ndarray[int64_t] result = np.empty(n, dtype=np.int64) - ndarray[int64_t] trans, deltas + ndarray[int64_t] trans + int64_t[:] deltas Py_ssize_t[:] pos npy_datetimestruct dts int64_t delta @@ -1190,7 +1195,7 @@ cdef inline int64_t _normalized_stamp(npy_datetimestruct *dts) nogil: return dtstruct_to_dt64(dts) -def is_date_array_normalized(ndarray[int64_t] stamps, tz=None): +def is_date_array_normalized(int64_t[:] stamps, tz=None): """ Check if all of the given (nanosecond) timestamps are normalized to midnight, i.e. hour == minute == second == 0. If the optional timezone @@ -1207,7 +1212,8 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None): """ cdef: Py_ssize_t i, n = len(stamps) - ndarray[int64_t] trans, deltas + ndarray[int64_t] trans + int64_t[:] deltas npy_datetimestruct dts int64_t local_val, delta diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ffa3d8df44be8..afda2046fd12d 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -14,7 +14,6 @@ from cpython.datetime cimport datetime import time import numpy as np -from numpy cimport ndarray # Avoid import from outside _libs if sys.version_info.major == 2: @@ -381,11 +380,11 @@ cpdef object _get_rule_month(object source, object default='DEC'): # Parsing for type-inference -def try_parse_dates(ndarray[object] values, parser=None, +def try_parse_dates(object[:] values, parser=None, dayfirst=False, default=None): cdef: Py_ssize_t i, n - ndarray[object] result + object[:] result n = len(values) result = np.empty(n, dtype='O') @@ -420,15 +419,15 @@ def try_parse_dates(ndarray[object] values, parser=None, # raise if passed parser and it failed raise - return result + return result.base # .base to access underlying ndarray -def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times, +def try_parse_date_and_time(object[:] dates, object[:] times, date_parser=None, time_parser=None, dayfirst=False, default=None): cdef: Py_ssize_t i, n - ndarray[object] result + object[:] result n = len(dates) if len(times) != n: @@ -457,14 +456,14 @@ def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times, result[i] = datetime(d.year, d.month, d.day, t.hour, t.minute, t.second) - return result + return result.base # .base to access underlying ndarray -def try_parse_year_month_day(ndarray[object] years, ndarray[object] months, - ndarray[object] days): +def try_parse_year_month_day(object[:] years, object[:] months, + object[:] days): cdef: Py_ssize_t i, n - ndarray[object] result + object[:] result n = len(years) if len(months) != n or len(days) != n: @@ -474,19 +473,19 @@ def try_parse_year_month_day(ndarray[object] years, ndarray[object] months, for i in range(n): result[i] = datetime(int(years[i]), int(months[i]), int(days[i])) - return result + return result.base # .base to access underlying ndarray -def try_parse_datetime_components(ndarray[object] years, - ndarray[object] months, - ndarray[object] days, - ndarray[object] hours, - ndarray[object] minutes, - ndarray[object] seconds): +def try_parse_datetime_components(object[:] years, + object[:] months, + object[:] days, + object[:] hours, + object[:] minutes, + object[:] seconds): cdef: Py_ssize_t i, n - ndarray[object] result + object[:] result int secs double float_secs double micros @@ -509,7 +508,7 @@ def try_parse_datetime_components(ndarray[object] years, int(hours[i]), int(minutes[i]), secs, int(micros)) - return result + return result.base # .base to access underlying ndarray # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 96d7994bdc822..811f0d25c3838 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -423,13 +423,13 @@ cdef inline int month_to_quarter(int month): @cython.wraparound(False) @cython.boundscheck(False) -def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): +def dt64arr_to_periodarr(int64_t[:] dtarr, int freq, tz=None): """ Convert array of datetime64 values (passed in as 'i8' dtype) to a set of periods corresponding to desired frequency, per period convention. """ cdef: - ndarray[int64_t] out + int64_t[:] out Py_ssize_t i, l npy_datetimestruct dts @@ -447,18 +447,18 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): out[i] = get_period_ordinal(&dts, freq) else: out = localize_dt64arr_to_period(dtarr, freq, tz) - return out + return out.base # .base to access underlying np.ndarray @cython.wraparound(False) @cython.boundscheck(False) -def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): +def periodarr_to_dt64arr(int64_t[:] periodarr, int freq): """ Convert array to datetime64 values from a set of ordinals corresponding to periods per period convention. """ cdef: - ndarray[int64_t] out + int64_t[:] out Py_ssize_t i, l l = len(periodarr) @@ -472,7 +472,7 @@ def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): continue out[i] = period_ordinal_to_dt64(periodarr[i], freq) - return out + return out.base # .base to access underlying np.ndarray cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end): @@ -556,7 +556,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): if upsampling, choose to use start ('S') or end ('E') of period. """ cdef: - ndarray[int64_t] result + int64_t[:] result Py_ssize_t i, n freq_conv_func func asfreq_info af_info @@ -584,7 +584,7 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): raise ValueError("Unable to convert to desired frequency.") result[i] = val - return result + return result.base # .base to access underlying np.ndarray cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, @@ -825,10 +825,10 @@ cdef int pdays_in_month(int64_t ordinal, int freq): return ccalendar.get_days_in_month(dts.year, dts.month) -def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): +def get_period_field_arr(int code, int64_t[:] arr, int freq): cdef: Py_ssize_t i, sz - ndarray[int64_t] out + int64_t[:] out accessor f func = _get_accessor_func(code) @@ -844,7 +844,7 @@ def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): continue out[i] = func(arr[i], freq) - return out + return out.base # .base to access underlying np.ndarray cdef accessor _get_accessor_func(int code): @@ -875,10 +875,10 @@ cdef accessor _get_accessor_func(int code): return NULL -def extract_ordinals(ndarray[object] values, freq): +def extract_ordinals(object[:] values, freq): cdef: Py_ssize_t i, n = len(values) - ndarray[int64_t] ordinals = np.empty(n, dtype=np.int64) + int64_t[:] ordinals = np.empty(n, dtype=np.int64) object p freqstr = Period._maybe_convert_freq(freq).freqstr @@ -904,10 +904,10 @@ def extract_ordinals(ndarray[object] values, freq): else: ordinals[i] = p.ordinal - return ordinals + return ordinals.base # .base to access underlying np.ndarray -def extract_freq(ndarray[object] values): +def extract_freq(object[:] values): cdef: Py_ssize_t i, n = len(values) object p @@ -930,12 +930,13 @@ def extract_freq(ndarray[object] values): @cython.wraparound(False) @cython.boundscheck(False) -cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, - int freq, object tz): +cdef int64_t[:] localize_dt64arr_to_period(int64_t[:] stamps, + int freq, object tz): cdef: Py_ssize_t n = len(stamps) - ndarray[int64_t] result = np.empty(n, dtype=np.int64) - ndarray[int64_t] trans, deltas + int64_t[:] result = np.empty(n, dtype=np.int64) + ndarray[int64_t] trans + int64_t[:] deltas Py_ssize_t[:] pos npy_datetimestruct dts int64_t local_val diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 0659e2a553e7e..18cc21ccd59e0 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -31,7 +31,7 @@ cdef int RESO_DAY = 6 # ---------------------------------------------------------------------- -cpdef resolution(ndarray[int64_t] stamps, tz=None): +cpdef resolution(int64_t[:] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps) npy_datetimestruct dts @@ -42,11 +42,12 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None): return _reso_local(stamps, tz) -cdef _reso_local(ndarray[int64_t] stamps, object tz): +cdef _reso_local(int64_t[:] stamps, object tz): cdef: Py_ssize_t i, n = len(stamps) int reso = RESO_DAY, curr_reso - ndarray[int64_t] trans, deltas + ndarray[int64_t] trans + int64_t[:] deltas Py_ssize_t[:] pos npy_datetimestruct dts int64_t local_val, delta diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index de2b7440156a7..59d673881bb40 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -26,7 +26,7 @@ from cython cimport Py_ssize_t from cpython cimport PyFloat_Check import numpy as np -from numpy cimport ndarray, int64_t +from numpy cimport int64_t from datetime import date as datetime_date @@ -60,7 +60,7 @@ cdef dict _parse_code_table = {'y': 0, 'z': 19} -def array_strptime(ndarray[object] values, object fmt, +def array_strptime(object[:] values, object fmt, bint exact=True, errors='raise'): """ Calculates the datetime structs represented by the passed array of strings @@ -76,8 +76,8 @@ def array_strptime(ndarray[object] values, object fmt, cdef: Py_ssize_t i, n = len(values) npy_datetimestruct dts - ndarray[int64_t] iresult - ndarray[object] result_timezone + int64_t[:] iresult + object[:] result_timezone int year, month, day, minute, hour, second, weekday, julian int week_of_year, week_of_year_start, parse_code, ordinal int64_t us, ns @@ -320,7 +320,7 @@ def array_strptime(ndarray[object] values, object fmt, result_timezone[i] = timezone - return result, result_timezone + return result, result_timezone.base """_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 3e7b88b208e89..2413c281e0a52 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,11 +3,11 @@ from cpython.datetime cimport timedelta -from numpy cimport int64_t, ndarray +from numpy cimport int64_t # Exposed for tslib, not intended for outside use. cdef parse_timedelta_string(object ts) cpdef int64_t cast_from_unit(object ts, object unit) except? -1 cpdef int64_t delta_to_nanoseconds(delta) except? -1 cpdef convert_to_timedelta64(object ts, object unit) -cpdef array_to_timedelta64(ndarray[object] values, unit=*, errors=*) +cpdef array_to_timedelta64(object[:] values, unit=*, errors=*) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f7a6cf0c6dafc..9e7f1d94934ba 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -13,7 +13,7 @@ from cpython cimport PyUnicode_Check, Py_NE, Py_EQ, PyObject_RichCompare import numpy as np cimport numpy as cnp -from numpy cimport int64_t, ndarray +from numpy cimport int64_t cnp.import_array() from cpython.datetime cimport (datetime, timedelta, @@ -83,7 +83,7 @@ _no_input = object() # ---------------------------------------------------------------------- # API -def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): +def ints_to_pytimedelta(int64_t[:] arr, box=False): """ convert an i8 repr to an ndarray of timedelta or Timedelta (if box == True) @@ -101,7 +101,7 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): cdef: Py_ssize_t i, n = len(arr) int64_t value - ndarray[object] result = np.empty(n, dtype=object) + object[:] result = np.empty(n, dtype=object) for i in range(n): @@ -114,7 +114,7 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): else: result[i] = timedelta(microseconds=int(value) / 1000) - return result + return result.base # .base to access underlying np.ndarray # ---------------------------------------------------------------------- @@ -199,7 +199,7 @@ cpdef convert_to_timedelta64(object ts, object unit): return ts.astype('timedelta64[ns]') -cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): +cpdef array_to_timedelta64(object[:] values, unit='ns', errors='raise'): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. @@ -207,7 +207,7 @@ cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): cdef: Py_ssize_t i, n - ndarray[int64_t] iresult + int64_t[:] iresult if errors not in ('ignore', 'raise', 'coerce'): raise ValueError("errors must be one of 'ignore', " @@ -233,7 +233,7 @@ cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): else: raise - return iresult + return iresult.base # .base to access underlying np.ndarray cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index be988e7247e59..eb5c0076a868a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -7,7 +7,7 @@ from cpython cimport (PyObject_RichCompareBool, PyObject_RichCompare, import numpy as np cimport numpy as cnp -from numpy cimport int64_t, int32_t, ndarray +from numpy cimport int64_t, int32_t, int8_t cnp.import_array() from datetime import time as datetime_time @@ -342,7 +342,7 @@ cdef class _Timestamp(datetime): cdef: int64_t val dict kwds - ndarray out + int8_t out[1] int month_kw freq = self.freq @@ -362,7 +362,7 @@ cdef class _Timestamp(datetime): cpdef _get_date_name_field(self, object field, object locale): cdef: int64_t val - ndarray out + object[:] out val = self._maybe_convert_value_to_local() out = get_date_name_field(np.array([val], dtype=np.int64), diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 2e3b07252d45e..a787452d90c07 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # cython: profile=False -cimport cython from cython cimport Py_ssize_t # dateutil compat @@ -19,7 +18,7 @@ UTC = pytz.utc import numpy as np cimport numpy as cnp -from numpy cimport ndarray, int64_t +from numpy cimport int64_t cnp.import_array() # ---------------------------------------------------------------------- @@ -188,10 +187,10 @@ cdef object get_utc_trans_times_from_dateutil_tz(object tz): return new_trans -cpdef ndarray[int64_t, ndim=1] unbox_utcoffsets(object transinfo): +cpdef int64_t[:] unbox_utcoffsets(object transinfo): cdef: Py_ssize_t i, sz - ndarray[int64_t] arr + int64_t[:] arr sz = len(transinfo) arr = np.empty(sz, dtype='i8') From 1b6d568d715bcbfd1db73c148e3fd287d7fe36f5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 31 Jul 2018 15:02:35 -0700 Subject: [PATCH 2/2] change more usages of ndarray to memorview --- pandas/_libs/tslib.pyx | 4 +--- pandas/_libs/tslibs/conversion.pyx | 36 ++++++++++++++---------------- pandas/_libs/tslibs/fields.pyx | 5 ++--- 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 18947166a9d84..eba553bfaeb48 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- # cython: profile=False -cimport cython from cython cimport Py_ssize_t from cpython cimport PyFloat_Check, PyUnicode_Check @@ -37,8 +36,7 @@ from tslibs.np_datetime import OutOfBoundsDatetime from tslibs.parsing import parse_datetime_string from tslibs.timedeltas cimport cast_from_unit -from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset, - treat_tz_as_pytz, get_dst_info) +from tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info from tslibs.conversion cimport (tz_convert_single, _TSObject, convert_datetime_to_tsobject, get_datetime64_nanos, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 805f2eb276372..a459b185fa48c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -91,7 +91,7 @@ def ensure_datetime64ns(ndarray arr, copy=True): """ cdef: Py_ssize_t i, n = arr.size - ndarray[int64_t] ivalues, iresult + int64_t[:] ivalues, iresult NPY_DATETIMEUNIT unit npy_datetimestruct dts @@ -139,7 +139,7 @@ def ensure_timedelta64ns(ndarray arr, copy=True): return arr.astype(TD_DTYPE, copy=copy) -def datetime_to_datetime64(ndarray[object] values): +def datetime_to_datetime64(object[:] values): """ Convert ndarray of datetime-like objects to int64 array representing nanosecond timestamps. @@ -156,7 +156,7 @@ def datetime_to_datetime64(ndarray[object] values): cdef: Py_ssize_t i, n = len(values) object val, inferred_tz = None - ndarray[int64_t] iresult + int64_t[:] iresult npy_datetimestruct dts _TSObject _ts @@ -632,16 +632,16 @@ cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz, cdef: Py_ssize_t n = len(values) Py_ssize_t i, j, pos - ndarray[int64_t] result = np.empty(n, dtype=np.int64) + int64_t[:] result = np.empty(n, dtype=np.int64) ndarray[int64_t] tt, trans int64_t[:] deltas - ndarray[Py_ssize_t] posn + Py_ssize_t[:] posn int64_t v trans, deltas, typ = get_dst_info(tz) if not to_utc: # We add `offset` below instead of subtracting it - deltas = -1 * deltas.base # `.base` to access underlying ndarray + deltas = -1 * np.array(deltas, dtype='i8') tt = values[values != NPY_NAT] if not len(tt): @@ -730,7 +730,6 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): converted: int64 """ cdef: - ndarray[int64_t] trans int64_t[:] deltas Py_ssize_t pos int64_t v, offset, utc_date @@ -759,7 +758,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): else: # Convert UTC to other timezone arr = np.array([utc_date]) - # Note: at least with cython 0.28.3, doing a looking `[0]` in the next + # Note: at least with cython 0.28.3, doing a lookup `[0]` in the next # line is sensitive to the declared return type of _tz_convert_dst; # if it is declared as returning ndarray[int64_t], a compile-time error # is raised. @@ -784,10 +783,9 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): """ cdef: - ndarray[int64_t] utc_dates, tt, result, trans, deltas + ndarray[int64_t] utc_dates, result Py_ssize_t i, j, pos, n = len(vals) - int64_t v, offset, delta - npy_datetimestruct dts + int64_t v if len(vals) == 0: return np.array([], dtype=np.int64) @@ -846,8 +844,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, localized : ndarray[int64_t] """ cdef: - ndarray[int64_t] trans, idx_shifted - int64_t[:] deltas + ndarray[int64_t] trans + int64_t[:] deltas, idx_shifted ndarray ambiguous_array Py_ssize_t i, idx, pos, ntrans, n = len(vals) int64_t *tdata @@ -1073,7 +1071,7 @@ def normalize_date(object dt): @cython.wraparound(False) @cython.boundscheck(False) -def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None): +def normalize_i8_timestamps(int64_t[:] stamps, tz=None): """ Normalize each of the (nanosecond) timestamps in the given array by rounding down to the beginning of the day (i.e. midnight). If `tz` @@ -1091,7 +1089,7 @@ def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps) npy_datetimestruct dts - ndarray[int64_t] result = np.empty(n, dtype=np.int64) + int64_t[:] result = np.empty(n, dtype=np.int64) if tz is not None: tz = maybe_get_tz(tz) @@ -1105,12 +1103,12 @@ def normalize_i8_timestamps(ndarray[int64_t] stamps, tz=None): dt64_to_dtstruct(stamps[i], &dts) result[i] = _normalized_stamp(&dts) - return result + return result.base # .base to access underlying np.ndarray @cython.wraparound(False) @cython.boundscheck(False) -cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz): +cdef int64_t[:] _normalize_local(int64_t[:] stamps, object tz): """ Normalize each of the (nanosecond) timestamps in the given array by rounding down to the beginning of the day (i.e. midnight) for the @@ -1127,7 +1125,7 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz): """ cdef: Py_ssize_t n = len(stamps) - ndarray[int64_t] result = np.empty(n, dtype=np.int64) + int64_t[:] result = np.empty(n, dtype=np.int64) ndarray[int64_t] trans int64_t[:] deltas Py_ssize_t[:] pos @@ -1211,7 +1209,7 @@ def is_date_array_normalized(int64_t[:] stamps, tz=None): is_normalized : bool True if all stamps are normalized """ cdef: - Py_ssize_t i, n = len(stamps) + Py_ssize_t pos, i, n = len(stamps) ndarray[int64_t] trans int64_t[:] deltas npy_datetimestruct dts diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index a298f521ef853..96f023f7fdafe 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -85,8 +85,7 @@ def build_field_sarray(ndarray[int64_t] dtindex): @cython.wraparound(False) @cython.boundscheck(False) -def get_date_name_field(ndarray[int64_t] dtindex, object field, - object locale=None): +def get_date_name_field(int64_t[:] dtindex, object field, object locale=None): """ Given a int64-based datetime index, return array of strings of date name based on requested field (e.g. weekday_name) @@ -134,7 +133,7 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field, @cython.wraparound(False) -def get_start_end_field(ndarray[int64_t] dtindex, object field, +def get_start_end_field(int64_t[:] dtindex, object field, object freqstr=None, int month_kw=12): """ Given an int64-based datetime index return array of indicators