diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd index 0888cf3c85f2f..5df1e381ea3ce 100644 --- a/pandas/_libs/algos.pxd +++ b/pandas/_libs/algos.pxd @@ -1,9 +1,6 @@ from util cimport numeric -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil - - cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: cdef: numeric t diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 075e2c5129579..e77899507833f 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -15,8 +15,7 @@ from numpy cimport (ndarray, NPY_FLOAT32, NPY_FLOAT64, NPY_OBJECT, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t, - double_t) + uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() @@ -32,10 +31,9 @@ import missing cdef float64_t FP_ERR = 1e-13 -cdef double NaN = np.NaN -cdef double nan = NaN +cdef float64_t NaN = np.NaN -cdef int64_t iNaT = get_nat() +cdef int64_t NPY_NAT = get_nat() tiebreakers = { 'average': TIEBREAK_AVERAGE, @@ -199,7 +197,7 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): @cython.boundscheck(False) @cython.wraparound(False) -cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil: +def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric: cdef: Py_ssize_t i, j, l, m, n = a.shape[0] numeric x @@ -812,7 +810,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): n = len(arr) if n == 1: - if arr[0] != arr[0] or (timelike and arr[0] == iNaT): + if arr[0] != arr[0] or (timelike and arr[0] == NPY_NAT): # single value is NaN return False, False, True else: @@ -820,7 +818,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): elif n < 2: return True, True, True - if timelike and arr[0] == iNaT: + if timelike and arr[0] == NPY_NAT: return False, False, True if algos_t is not object: @@ -828,7 +826,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): prev = arr[0] for i in range(1, n): cur = arr[i] - if timelike and cur == iNaT: + if timelike and cur == NPY_NAT: is_monotonic_inc = 0 is_monotonic_dec = 0 break @@ -853,7 +851,7 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): prev = arr[0] for i in range(1, n): cur = arr[i] - if timelike and cur == iNaT: + if timelike and cur == NPY_NAT: is_monotonic_inc = 0 is_monotonic_dec = 0 break diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index c2b0a4119e6e5..3708deb1a4b76 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -84,9 +84,9 @@ def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # ensure_dtype -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- cdef int PLATFORM_INT = (np.arange(0, dtype=np.intp)).descr.type_num diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index fcb052e8be63b..4d144dcf2808a 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -74,9 +74,9 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} - mask = values == iNaT + mask = values == NPY_NAT - # create copy in case of iNaT + # create copy in case of NPY_NAT # values are mutated inplace if mask.any(): values = values.copy() @@ -149,7 +149,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', {{if dtype != 'uint64'}} isnan = sorted_mask[i] if isnan and keep_na: - ranks[argsorted[i]] = nan + ranks[argsorted[i]] = NaN continue {{endif}} @@ -257,7 +257,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} - mask = values == iNaT + mask = values == NPY_NAT {{endif}} np.putmask(values, mask, nan_value) @@ -317,7 +317,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', {{else}} if (val == nan_value) and keep_na: {{endif}} - ranks[i, argsorted[i, j]] = nan + ranks[i, argsorted[i, j]] = NaN {{if dtype == 'object'}} infs += 1 diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in index bd5feef1ff2b0..2fea8b17fd9d7 100644 --- a/pandas/_libs/algos_take_helper.pxi.in +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for take WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # take_1d, take_2d -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 83ded64b742ed..7c16b29f3e42b 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1,14 +1,13 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp from numpy cimport (ndarray, - double_t, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() @@ -20,10 +19,9 @@ from algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, TIEBREAK_MIN, TIEBREAK_MAX, TIEBREAK_FIRST, TIEBREAK_DENSE) from algos import take_2d_axis1_float64_float64, groupsort_indexer, tiebreakers -cdef int64_t iNaT = get_nat() +cdef int64_t NPY_NAT = get_nat() -cdef double NaN = np.NaN -cdef double nan = NaN +cdef float64_t NaN = np.NaN cdef inline float64_t median_linear(float64_t* a, int n) nogil: @@ -67,13 +65,13 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: return result -# TODO: Is this redundant with algos.kth_smallest? +# TODO: Is this redundant with algos.kth_smallest cdef inline float64_t kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n) nogil: cdef: Py_ssize_t i, j, l, m - double_t x, t + float64_t x, t l = 0 m = n - 1 @@ -109,7 +107,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, cdef: Py_ssize_t i, j, N, K, ngroups, size ndarray[int64_t] _counts - ndarray data + ndarray[float64_t, ndim=2] data float64_t* ptr assert min_count == -1, "'min_count' only used in add and prod" @@ -139,8 +137,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) def group_cumprod_float64(float64_t[:, :] out, - float64_t[:, :] values, - int64_t[:] labels, + const float64_t[:, :] values, + const int64_t[:] labels, bint is_datetimelike, bint skipna=True): """ @@ -177,7 +175,7 @@ def group_cumprod_float64(float64_t[:, :] out, @cython.wraparound(False) def group_cumsum(numeric[:, :] out, numeric[:, :] values, - int64_t[:] labels, + const int64_t[:] labels, is_datetimelike, bint skipna=True): """ @@ -217,7 +215,7 @@ def group_cumsum(numeric[:, :] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, +def group_shift_indexer(int64_t[:] out, const int64_t[:] labels, int ngroups, int periods): cdef: Py_ssize_t N, i, j, ii @@ -291,7 +289,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, """ cdef: Py_ssize_t i, N - ndarray[int64_t] sorted_labels + int64_t[:] sorted_labels int64_t idx, curr_fill_idx=-1, filled_vals=0 N = len(out) @@ -327,10 +325,10 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, @cython.boundscheck(False) @cython.wraparound(False) -def group_any_all(ndarray[uint8_t] out, - ndarray[int64_t] labels, - ndarray[uint8_t] values, - ndarray[uint8_t] mask, +def group_any_all(uint8_t[:] out, + const int64_t[:] labels, + const uint8_t[:] values, + const uint8_t[:] mask, object val_test, bint skipna): """Aggregated boolean values to show truthfulness of group elements diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 484a4b069305f..523d43f893aad 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -5,7 +5,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ cdef extern from "numpy/npy_math.h": - double NAN "NPY_NAN" + float64_t NAN "NPY_NAN" _int64_max = np.iinfo(np.int64).max # ---------------------------------------------------------------------- @@ -268,16 +268,16 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, {{endfor}} -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # group_nth, group_last, group_rank -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: # name, c_type, nan_val dtypes = [('float64', 'float64_t', 'NAN'), ('float32', 'float32_t', 'NAN'), - ('int64', 'int64_t', 'iNaT'), + ('int64', 'int64_t', 'NPY_NAT'), ('object', 'object', 'NAN')] def get_dispatch(dtypes): @@ -527,7 +527,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # to the result where appropriate if keep_na and mask[_as[i]]: for j in range(i - dups + 1, i + 1): - out[_as[j], 0] = nan + out[_as[j], 0] = NaN grp_na_count = dups elif tiebreak == TIEBREAK_AVERAGE: for j in range(i - dups + 1, i + 1): @@ -630,7 +630,7 @@ def group_max(ndarray[groupby_t, ndim=2] out, if groupby_t is int64_t: # Note: evaluated at compile-time maxx[:] = -_int64_max - nan_val = iNaT + nan_val = NPY_NAT else: maxx[:] = -np.inf nan_val = NAN @@ -692,7 +692,7 @@ def group_min(ndarray[groupby_t, ndim=2] out, minx = np.empty_like(out) if groupby_t is int64_t: minx[:] = _int64_max - nan_val = iNaT + nan_val = NPY_NAT else: minx[:] = np.inf nan_val = NAN @@ -762,8 +762,8 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, # val = nan if groupby_t is int64_t: - if is_datetimelike and val == iNaT: - out[i, j] = iNaT + if is_datetimelike and val == NPY_NAT: + out[i, j] = NPY_NAT else: mval = accum[lab, j] if val < mval: @@ -809,8 +809,8 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, val = values[i, j] if groupby_t is int64_t: - if is_datetimelike and val == iNaT: - out[i, j] = iNaT + if is_datetimelike and val == NPY_NAT: + out[i, j] = NPY_NAT else: mval = accum[lab, j] if val > mval: diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index d38b72ccebbb2..9aa887727a765 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -9,11 +9,11 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp -from numpy cimport ndarray, uint8_t, uint32_t +from numpy cimport ndarray, uint8_t, uint32_t, float64_t cnp.import_array() cdef extern from "numpy/npy_math.h": - double NAN "NPY_NAN" + float64_t NAN "NPY_NAN" from khash cimport ( @@ -42,9 +42,7 @@ cimport util from missing cimport checknull -nan = np.nan - -cdef int64_t iNaT = util.get_nat() +cdef int64_t NPY_NAT = util.get_nat() _SIZE_HINT_LIMIT = (1 << 20) + 7 diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 36ed8a88aa78b..a71023ed34f44 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -251,9 +251,9 @@ cdef class HashTable: {{py: # name, dtype, float_group, default_na_value -dtypes = [('Float64', 'float64', True, 'nan'), +dtypes = [('Float64', 'float64', True, 'np.nan'), ('UInt64', 'uint64', False, 0), - ('Int64', 'int64', False, 'iNaT')] + ('Int64', 'int64', False, 'NPY_NAT')] }} diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index d418ac63a4ac8..7930f583274b5 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -25,7 +25,7 @@ from pandas._libs import algos, hashtable as _hash from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib from pandas._libs.missing import checknull -cdef int64_t iNaT = util.get_nat() +cdef int64_t NPY_NAT = util.get_nat() cdef inline bint is_definitely_invalid_key(object val): @@ -520,7 +520,7 @@ cpdef convert_scalar(ndarray arr, object value): elif isinstance(value, (datetime, np.datetime64, date)): return Timestamp(value).value elif value is None or value != value: - return iNaT + return NPY_NAT elif util.is_string_object(value): return Timestamp(value).value raise ValueError("cannot set a Timestamp with a non-timestamp") @@ -531,7 +531,7 @@ cpdef convert_scalar(ndarray arr, object value): elif isinstance(value, timedelta): return Timedelta(value).value elif value is None or value != value: - return iNaT + return NPY_NAT elif util.is_string_object(value): return Timedelta(value).value raise ValueError("cannot set a Timedelta with a non-timedelta") diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index c19812efaaa35..ff95917f6643a 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -4,9 +4,9 @@ Template for functions of IndexEngine subclasses. WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # IndexEngine Subclass Methods -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index a395fdbabeca2..dae88d3b707bf 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -1,20 +1,27 @@ # -*- coding: utf-8 -*- import numbers +from operator import le, lt from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE, PyObject_RichCompare) -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np -from numpy cimport ndarray +cimport numpy as cnp +from numpy cimport ( + int64_t, int32_t, float64_t, float32_t, uint64_t, + ndarray, + PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) +cnp.import_array() -from operator import le, lt cimport util util.import_array() +from hashtable cimport Int64Vector, Int64VectorData + from tslibs import Timestamp from tslibs.timezones cimport tz_compare diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index f9427fbbcd900..aa53f5086b894 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -4,21 +4,6 @@ Template for intervaltree WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -from numpy cimport ( - int64_t, int32_t, float64_t, float32_t, uint64_t, - ndarray, - PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) -import numpy as np - -cimport cython -from cython cimport Py_ssize_t - -cimport numpy as cnp -cnp.import_array() - -from hashtable cimport Int64Vector, Int64VectorData - - ctypedef fused scalar_t: float64_t float32_t @@ -26,10 +11,9 @@ ctypedef fused scalar_t: int32_t uint64_t - -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # IntervalTree -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- cdef class IntervalTree(IntervalMixin): """A centered interval tree @@ -203,9 +187,10 @@ cdef sort_values_and_indices(all_values, all_indices, subset): sorted_indices = take(indices, sorter) return sorted_values, sorted_indices -#---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- # Nodes -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # we need specialized nodes and leaves to optimize for different dtype and # closed values diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 748f3f265dd34..54dfeeff1452d 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -10,10 +10,6 @@ from numpy cimport (ndarray, uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() - -cdef double NaN = np.NaN -cdef double nan = NaN - from pandas._libs.algos import groupsort_indexer, ensure_platform_int from pandas.core.algorithms import take_nd @@ -673,7 +669,7 @@ ctypedef fused asof_t: int32_t int64_t float - double + float64_t ctypedef fused by_t: object diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a9e0fcbc4a826..cfc60256e97a3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -45,13 +45,14 @@ cdef extern from "numpy/arrayobject.h": cdef extern from "src/parse_helper.h": - int floatify(object, double *result, int *maybe_int) except -1 + int floatify(object, float64_t *result, int *maybe_int) except -1 cimport util from util cimport (is_nan, UINT8_MAX, UINT64_MAX, INT64_MAX, INT64_MIN) from tslib import array_to_datetime +from tslibs.nattype cimport NPY_NAT from tslibs.nattype import NaT from tslibs.conversion cimport convert_to_tsobject from tslibs.timedeltas cimport convert_to_timedelta64 @@ -67,11 +68,8 @@ cdef object oINT64_MAX = INT64_MAX cdef object oINT64_MIN = INT64_MIN cdef object oUINT64_MAX = UINT64_MAX -cdef int64_t NPY_NAT = util.get_nat() -iNaT = util.get_nat() - cdef bint PY2 = sys.version_info[0] == 2 -cdef double nan = np.NaN +cdef float64_t NaN = np.NaN def values_from_object(obj: object): @@ -104,7 +102,7 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t: # ---------------------------------------------------------------------- -def is_scalar(val: object) -> bint: +def is_scalar(val: object) -> bool: """ Return True if given value is scalar. @@ -628,7 +626,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, nat_count = 0 if hasnans: - mask = values == iNaT + mask = values == NPY_NAT nat_count = np.sum(mask) values = values[~mask] @@ -1816,7 +1814,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, if val.__hash__ is not None and val in na_values: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN elif util.is_float_object(val): fval = val if fval != fval: @@ -1847,11 +1845,11 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, seen.bool_ = True elif val is None: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN elif hasattr(val, '__len__') and len(val) == 0: if convert_empty or seen.coerce_numeric: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN else: raise ValueError('Empty string encountered') elif util.is_complex_object(val): @@ -1866,7 +1864,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, if fval in na_values: seen.saw_null() - floats[i] = complexes[i] = nan + floats[i] = complexes[i] = NaN else: if fval != fval: seen.null_ = True @@ -1899,7 +1897,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, elif "uint64" in str(e): # Exception from check functions. raise seen.saw_null() - floats[i] = nan + floats[i] = NaN if seen.check_uint64_conflict(): return values @@ -1967,10 +1965,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, floats[i] = complexes[i] = fnan elif val is NaT: if convert_datetime: - idatetimes[i] = iNaT + idatetimes[i] = NPY_NAT seen.datetime_ = 1 if convert_timedelta: - itimedeltas[i] = iNaT + itimedeltas[i] = NPY_NAT seen.timedelta_ = 1 if not (convert_datetime or convert_timedelta): seen.object_ = 1 diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index b8791359241ad..1fdb04dd10d8e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -5,16 +5,17 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport ndarray, int64_t, uint8_t +from numpy cimport ndarray, int64_t, uint8_t, float64_t cnp.import_array() cimport util from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value +from tslibs.nattype cimport checknull_with_nat from tslibs.nattype import NaT -cdef double INF = np.inf -cdef double NEGINF = -INF +cdef float64_t INF = np.inf +cdef float64_t NEGINF = -INF cdef int64_t NPY_NAT = util.get_nat() @@ -295,9 +296,7 @@ def isneginf_scalar(val: object) -> bool: cdef inline bint is_null_datetime64(v): # determine if we have a null for a datetime (or integer versions), # excluding np.timedelta64('nat') - if v is None or util.is_nan(v): - return True - elif v is NaT: + if checknull_with_nat(v): return True elif util.is_datetime64_object(v): return v.view('int64') == NPY_NAT @@ -307,9 +306,7 @@ cdef inline bint is_null_datetime64(v): cdef inline bint is_null_timedelta64(v): # determine if we have a null for a timedelta (or integer versions), # excluding np.datetime64('nat') - if v is None or util.is_nan(v): - return True - elif v is NaT: + if checknull_with_nat(v): return True elif util.is_timedelta64_object(v): return v.view('int64') == NPY_NAT @@ -319,8 +316,4 @@ cdef inline bint is_null_timedelta64(v): cdef inline bint is_null_period(v): # determine if we have a null for a Period (or integer versions), # excluding np.datetime64('nat') and np.timedelta64('nat') - if v is None or util.is_nan(v): - return True - elif v is NaT: - return True - return False + return checknull_with_nat(v) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 391de339ad60e..3870a55c22fd6 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -65,8 +65,8 @@ CParserError = ParserError cdef bint PY3 = (sys.version_info[0] >= 3) -cdef double INF = np.inf -cdef double NEGINF = -INF +cdef float64_t INF = np.inf +cdef float64_t NEGINF = -INF cdef extern from "errno.h": @@ -182,10 +182,10 @@ cdef extern from "parser/tokenizer.h": int64_t skip_first_N_rows int64_t skipfooter # pick one, depending on whether the converter requires GIL - double (*double_converter_nogil)(const char *, char **, - char, char, char, int) nogil - double (*double_converter_withgil)(const char *, char **, - char, char, char, int) + float64_t (*double_converter_nogil)(const char *, char **, + char, char, char, int) nogil + float64_t (*double_converter_withgil)(const char *, char **, + char, char, char, int) # error handling char *warn_msg @@ -233,12 +233,12 @@ cdef extern from "parser/tokenizer.h": uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, uint64_t uint_max, int *error, char tsep) nogil - double xstrtod(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing) nogil - double precise_xstrtod(const char *p, char **q, char decimal, char sci, - char tsep, int skip_trailing) nogil - double round_trip(const char *p, char **q, char decimal, char sci, + float64_t xstrtod(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing) nogil + float64_t precise_xstrtod(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing) nogil + float64_t round_trip(const char *p, char **q, char decimal, char sci, + char tsep, int skip_trailing) nogil int to_boolean(const char *item, uint8_t *val) nogil @@ -1697,8 +1697,8 @@ cdef _try_double(parser_t *parser, int64_t col, coliter_t it const char *word = NULL char *p_end - double *data - double NA = na_values[np.float64] + float64_t *data + float64_t NA = na_values[np.float64] kh_float64_t *na_fset ndarray result khiter_t k @@ -1706,7 +1706,7 @@ cdef _try_double(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.float64) - data = result.data + data = result.data na_fset = kset_float64_from_list(na_flist) if parser.double_converter_nogil != NULL: # if it can run without the GIL with nogil: @@ -1717,8 +1717,8 @@ cdef _try_double(parser_t *parser, int64_t col, else: assert parser.double_converter_withgil != NULL error = _try_double_nogil(parser, - parser.double_converter_withgil, col, line_start, line_end, na_filter, na_hashset, use_na_flist, @@ -1730,14 +1730,14 @@ cdef _try_double(parser_t *parser, int64_t col, cdef inline int _try_double_nogil(parser_t *parser, - double (*double_converter)( + float64_t (*double_converter)( const char *, char **, char, char, char, int) nogil, int col, int line_start, int line_end, bint na_filter, kh_str_t *na_hashset, bint use_na_flist, const kh_float64_t *na_flist, - double NA, double *data, + float64_t NA, float64_t *data, int *na_count) nogil: cdef: int error, diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index b8ca744ac88c4..668bd0ae6bbb7 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -22,9 +22,6 @@ _np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11') cdef float64_t NaN = np.NaN cdef float64_t INF = np.inf -cdef inline int int_max(int a, int b): return a if a >= b else b -cdef inline int int_min(int a, int b): return a if a <= b else b - # ----------------------------------------------------------------------------- @@ -673,13 +670,6 @@ cdef class BlockMerge(object): self.yi = xi -cdef class BlockIntersection(BlockMerge): - """ - not done yet - """ - pass - - cdef class BlockUnion(BlockMerge): """ Object-oriented approach makes sharing state between recursive functions a @@ -805,63 +795,6 @@ cdef class BlockUnion(BlockMerge): include "sparse_op_helper.pxi" -# ----------------------------------------------------------------------------- -# Indexing operations - -def get_reindexer(ndarray[object, ndim=1] values, dict index_map): - cdef: - object idx - Py_ssize_t i - Py_ssize_t new_length = len(values) - ndarray[int32_t, ndim=1] indexer - - indexer = np.empty(new_length, dtype=np.int32) - - for i in range(new_length): - idx = values[i] - if idx in index_map: - indexer[i] = index_map[idx] - else: - indexer[i] = -1 - - return indexer - -# def reindex_block(ndarray[float64_t, ndim=1] values, -# BlockIndex sparse_index, -# ndarray[int32_t, ndim=1] indexer): -# cdef: -# Py_ssize_t i, length -# ndarray[float64_t, ndim=1] out - -# out = np.empty(length, dtype=np.float64) - -# for i in range(length): -# if indexer[i] == -1: -# pass - - -# cdef class SparseCruncher(object): -# """ -# Class to acquire float pointer for convenient operations on sparse data -# structures -# """ -# cdef: -# SparseIndex index -# float64_t* buf - -# def __init__(self, ndarray[float64_t, ndim=1, mode='c'] values, -# SparseIndex index): - -# self.index = index -# self.buf = values.data - - -def reindex_integer(ndarray[float64_t, ndim=1] values, - IntIndex sparse_index, - ndarray[int32_t, ndim=1] indexer): - pass - - # ----------------------------------------------------------------------------- # SparseArray mask create operations diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index d02a985de1d61..1f41096a3f194 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for sparse ops WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # Sparse op -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- ctypedef fused sparse_t: float64_t diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9012ebefe0975..e346eb7e598ed 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import cython from cython import Py_ssize_t from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, @@ -37,7 +38,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject, get_datetime64_nanos, tz_convert_utc_to_tzlocal) -from tslibs.nattype import NaT, nat_strings, iNaT +# many modules still look for NaT and iNaT here despite them not being needed +from tslibs.nattype import nat_strings, NaT, iNaT # noqa:F821 from tslibs.nattype cimport checknull_with_nat, NPY_NAT from tslibs.offsets cimport to_offset @@ -71,6 +73,8 @@ cdef inline object create_time_from_ts( return time(dts.hour, dts.min, dts.sec, dts.us, tz) +@cython.wraparound(False) +@cython.boundscheck(False) def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp @@ -213,6 +217,8 @@ def _test_parse_iso8601(object ts): return Timestamp(obj.value) +@cython.wraparound(False) +@cython.boundscheck(False) def format_array_from_datetime(ndarray[int64_t] values, object tz=None, object format=None, object na_rep=None): """ @@ -335,7 +341,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): # then need to iterate try: iresult = values.astype('i8', casting='same_kind', copy=False) - mask = iresult == iNaT + mask = iresult == NPY_NAT iresult[mask] = 0 fvalues = iresult.astype('f8') * m need_to_iterate = False @@ -351,7 +357,7 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): "'{unit}'".format(unit=unit)) result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') - iresult[mask] = iNaT + iresult[mask] = NPY_NAT return result result = np.empty(n, dtype='M8[ns]') @@ -449,6 +455,8 @@ def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): return oresult +@cython.wraparound(False) +@cython.boundscheck(False) cpdef array_to_datetime(ndarray[object] values, errors='raise', dayfirst=False, yearfirst=False, format=None, utc=None, @@ -752,6 +760,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) +@cython.wraparound(False) +@cython.boundscheck(False) cdef array_to_datetime_object(ndarray[object] values, bint is_raise, dayfirst=False, yearfirst=False): """ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e2914957d01cd..457f5003cb9a5 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import enum import warnings from cpython cimport (PyObject_RichCompareBool, PyObject_RichCompare, @@ -23,7 +24,6 @@ cimport ccalendar from conversion import tz_localize_to_utc, normalize_i8_timestamps from conversion cimport (tz_convert_single, _TSObject, convert_to_tsobject, convert_datetime_to_tsobject) -import enum from fields import get_start_end_field, get_date_name_field from nattype import NaT from nattype cimport NPY_NAT diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index bb7af67d14585..f517e0933264a 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -9,15 +9,15 @@ from libc.stdlib cimport malloc, free import numpy as np cimport numpy as cnp -from numpy cimport ndarray, double_t, int64_t, float64_t, float32_t +from numpy cimport ndarray, int64_t, float64_t, float32_t cnp.import_array() cdef extern from "src/headers/cmath" namespace "std": - bint isnan(double) nogil - bint notnan(double) nogil - int signbit(double) nogil - double sqrt(double x) nogil + bint isnan(float64_t) nogil + bint notnan(float64_t) nogil + int signbit(float64_t) nogil + float64_t sqrt(float64_t x) nogil cimport util from util cimport numeric @@ -32,7 +32,7 @@ cdef float64_t MINfloat64 = np.NINF cdef float32_t MAXfloat32 = np.inf cdef float64_t MAXfloat64 = np.inf -cdef double NaN = np.NaN +cdef float64_t NaN = np.NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b @@ -80,6 +80,7 @@ def _check_minp(win, minp, N, floor=None): return max(minp, floor) + # original C implementation by N. Devillard. # This code in public domain. # Function : kth_smallest() @@ -352,19 +353,20 @@ def get_window_indexer(values, win, minp, index, closed, right_closed, index, floor) return indexer.get_data() + # ---------------------------------------------------------------------- # Rolling count # this is only an impl for index not None, IOW, freq aware -def roll_count(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_count(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, count_x = 0.0 + float64_t val, count_x = 0.0 int64_t s, e, nobs, N Py_ssize_t i, j ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, _ = get_window_indexer(values, win, minp, index, closed) @@ -406,12 +408,15 @@ def roll_count(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling sum -cdef inline double calc_sum(int64_t minp, int64_t nobs, double sum_x) nogil: - cdef double result +cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, + float64_t sum_x) nogil: + cdef: + float64_t result if nobs >= minp: result = sum_x @@ -421,7 +426,7 @@ cdef inline double calc_sum(int64_t minp, int64_t nobs, double sum_x) nogil: return result -cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: +cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogil: """ add a value from the sum calc """ # Not NaN @@ -430,7 +435,8 @@ cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: sum_x[0] = sum_x[0] + val -cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: +cdef inline void remove_sum(float64_t val, + int64_t *nobs, float64_t *sum_x) nogil: """ remove a value from the sum calc """ if notnan(val): @@ -438,15 +444,15 @@ cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: sum_x[0] = sum_x[0] - val -def roll_sum(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_sum(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev_x, sum_x = 0 + float64_t val, prev_x, sum_x = 0 int64_t s, e, range_endpoint int64_t nobs = 0, i, j, N bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -511,16 +517,18 @@ def roll_sum(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling mean -cdef inline double calc_mean(int64_t minp, Py_ssize_t nobs, - Py_ssize_t neg_ct, double sum_x) nogil: - cdef double result +cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, + Py_ssize_t neg_ct, float64_t sum_x) nogil: + cdef: + float64_t result if nobs >= minp: - result = sum_x / nobs + result = sum_x / nobs if neg_ct == 0 and result < 0: # all positive result = 0 @@ -534,7 +542,7 @@ cdef inline double calc_mean(int64_t minp, Py_ssize_t nobs, return result -cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, +cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct) nogil: """ add a value from the mean calc """ @@ -546,7 +554,7 @@ cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, neg_ct[0] = neg_ct[0] + 1 -cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, +cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, Py_ssize_t *neg_ct) nogil: """ remove a value from the mean calc """ @@ -557,15 +565,15 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, neg_ct[0] = neg_ct[0] - 1 -def roll_mean(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_mean(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev_x, result, sum_x = 0 + float64_t val, prev_x, result, sum_x = 0 int64_t s, e bint is_variable Py_ssize_t nobs = 0, i, j, neg_ct = 0, N ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -627,13 +635,15 @@ def roll_mean(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling variance -cdef inline double calc_var(int64_t minp, int ddof, double nobs, - double ssqdm_x) nogil: - cdef double result +cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, + float64_t ssqdm_x) nogil: + cdef: + float64_t result # Variance is unchanged if no observation is added or removed if (nobs >= minp) and (nobs > ddof): @@ -642,7 +652,7 @@ cdef inline double calc_var(int64_t minp, int ddof, double nobs, if nobs == 1: result = 0 else: - result = ssqdm_x / (nobs - ddof) + result = ssqdm_x / (nobs - ddof) if result < 0: result = 0 else: @@ -651,10 +661,12 @@ cdef inline double calc_var(int64_t minp, int ddof, double nobs, return result -cdef inline void add_var(double val, double *nobs, double *mean_x, - double *ssqdm_x) nogil: +cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: """ add a value from the var calc """ - cdef double delta + cdef: + float64_t delta + # `isnan` instead of equality as fix for GH-21813, msvc 2017 bug if isnan(val): return @@ -667,10 +679,11 @@ cdef inline void add_var(double val, double *nobs, double *mean_x, ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0] -cdef inline void remove_var(double val, double *nobs, double *mean_x, - double *ssqdm_x) nogil: +cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: """ remove a value from the var calc """ - cdef double delta + cdef: + float64_t delta if notnan(val): nobs[0] = nobs[0] - 1 @@ -685,18 +698,19 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, ssqdm_x[0] = 0 -def roll_var(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_var(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed, int ddof=1): """ Numerically stable implementation using Welford's method. """ cdef: - double val, prev, mean_x = 0, ssqdm_x = 0, nobs = 0, delta, mean_x_old + float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, + float64_t val, prev, delta, mean_x_old int64_t s, e bint is_variable Py_ssize_t i, j, N ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -785,13 +799,15 @@ def roll_var(ndarray[double_t] values, int64_t win, int64_t minp, # ---------------------------------------------------------------------- # Rolling skewness -cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, - double xxx) nogil: - cdef double result, dnobs - cdef double A, B, C, R +cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, R if nobs >= minp: - dnobs = nobs + dnobs = nobs A = x / dnobs B = xx / dnobs - A * A C = xxx / dnobs - A * A * A - 3 * A * B @@ -817,8 +833,9 @@ cdef inline double calc_skew(int64_t minp, int64_t nobs, double x, double xx, return result -cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, - double *xxx) nogil: +cdef inline void add_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: """ add a value from the skew calc """ # Not NaN @@ -831,8 +848,9 @@ cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, xxx[0] = xxx[0] + val * val * val -cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, - double *xxx) nogil: +cdef inline void remove_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: """ remove a value from the skew calc """ # Not NaN @@ -845,16 +863,16 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, xxx[0] = xxx[0] - val * val * val -def roll_skew(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_skew(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev - double x = 0, xx = 0, xxx = 0 + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0 int64_t nobs = 0, i, j, N int64_t s, e bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -915,17 +933,20 @@ def roll_skew(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling kurtosis -cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, - double xxx, double xxxx) nogil: - cdef double result, dnobs - cdef double A, B, C, D, R, K +cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx, float64_t xxxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, D, R, K if nobs >= minp: - dnobs = nobs + dnobs = nobs A = x / dnobs R = A * A B = xx / dnobs - R @@ -954,8 +975,9 @@ cdef inline double calc_kurt(int64_t minp, int64_t nobs, double x, double xx, return result -cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, - double *xxx, double *xxxx) nogil: +cdef inline void add_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: """ add a value from the kurotic calc """ # Not NaN @@ -969,8 +991,9 @@ cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, xxxx[0] = xxxx[0] + val * val * val * val -cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, - double *xxx, double *xxxx) nogil: +cdef inline void remove_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: """ remove a value from the kurotic calc """ # Not NaN @@ -984,16 +1007,16 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, xxxx[0] = xxxx[0] - val * val * val * val -def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, +def roll_kurt(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, prev - double x = 0, xx = 0, xxx = 0, xxxx = 0 + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 int64_t nobs = 0, i, j, N int64_t s, e bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output start, end, N, win, minp, is_variable = get_window_indexer(values, win, minp, index, @@ -1050,6 +1073,7 @@ def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, return output + # ---------------------------------------------------------------------- # Rolling median, min, max @@ -1057,7 +1081,7 @@ def roll_kurt(ndarray[double_t] values, int64_t win, int64_t minp, def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, object index, object closed): cdef: - double val, res, prev + float64_t val, res, prev bint err = 0, is_variable int ret = 0 skiplist_t *sl @@ -1065,7 +1089,7 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, int64_t nobs = 0, N, s, e int midpoint ndarray[int64_t] start, end - ndarray[double_t] output + ndarray[float64_t] output # we use the Fixed/Variable Indexer here as the # actual skiplist ops outweigh any window computation costs @@ -1130,6 +1154,7 @@ def roll_median_c(ndarray[float64_t] values, int64_t win, int64_t minp, raise MemoryError("skiplist_insert failed") return output + # ---------------------------------------------------------------------- # Moving maximum / minimum code taken from Bottleneck under the terms @@ -1167,7 +1192,8 @@ cdef inline void remove_mm(numeric aold, Py_ssize_t *nobs) nogil: cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, numeric value) nogil: - cdef numeric result + cdef: + numeric result if numeric in cython.floating: if nobs >= minp: @@ -1252,7 +1278,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values, Py_ssize_t nobs = 0 deque Q[int64_t] # min/max always the front deque W[int64_t] # track the whole window for nobs compute - ndarray[double_t, ndim=1] output + ndarray[float64_t, ndim=1] output output = np.empty(N, dtype=float) Q = deque[int64_t]() @@ -1335,7 +1361,7 @@ cdef _roll_min_max_fixed(ndarray[numeric] values, numeric* minvalue numeric* end numeric* last - ndarray[double_t, ndim=1] output + ndarray[float64_t, ndim=1] output output = np.empty(N, dtype=float) # setup the rings of death! @@ -1427,19 +1453,19 @@ interpolation_types = { def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, int64_t minp, object index, object closed, - double quantile, str interpolation): + float64_t quantile, str interpolation): """ O(N log(window)) implementation using skip list """ cdef: - double val, prev, midpoint, idx_with_fraction + float64_t val, prev, midpoint, idx_with_fraction skiplist_t *skiplist int64_t nobs = 0, i, j, s, e, N Py_ssize_t idx bint is_variable ndarray[int64_t] start, end - ndarray[double_t] output - double vlow, vhigh + ndarray[float64_t] output + float64_t vlow, vhigh InterpolationType interpolation_type int ret = 0 @@ -1529,7 +1555,7 @@ def roll_quantile(ndarray[float64_t, cast=True] values, int64_t win, elif interpolation_type == MIDPOINT: vlow = skiplist_get(skiplist, idx, &ret) vhigh = skiplist_get(skiplist, idx + 1, &ret) - output[i] = (vlow + vhigh) / 2 + output[i] = (vlow + vhigh) / 2 else: output[i] = NaN @@ -1543,7 +1569,7 @@ def roll_generic(object obj, int offset, object func, bint raw, object args, object kwargs): cdef: - ndarray[double_t] output, counts, bufarr + ndarray[float64_t] output, counts, bufarr ndarray[float64_t, cast=True] arr float64_t *buf float64_t *oldbuf @@ -1642,7 +1668,7 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] values, Assume len(weights) << len(values) """ cdef: - ndarray[double_t] output, tot_wgt, counts + ndarray[float64_t] output, tot_wgt, counts Py_ssize_t in_i, win_i, win_n, win_k, in_n, in_k float64_t val_in, val_win, c, w @@ -1703,7 +1729,8 @@ def roll_window(ndarray[float64_t, ndim=1, cast=True] values, # Exponentially weighted moving average -def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): +def ewma(float64_t[:] vals, float64_t com, + int adjust, int ignore_na, int minp): """ Compute exponentially-weighted moving average using center-of-mass. @@ -1722,8 +1749,8 @@ def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): cdef: Py_ssize_t N = len(vals) - ndarray[double_t] output = np.empty(N, dtype=float) - double alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur + ndarray[float64_t] output = np.empty(N, dtype=float) + float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur Py_ssize_t i, nobs if N == 0: @@ -1767,12 +1794,13 @@ def ewma(double_t[:] vals, double_t com, int adjust, int ignore_na, int minp): return output + # ---------------------------------------------------------------------- # Exponentially weighted moving covariance -def ewmcov(double_t[:] input_x, double_t[:] input_y, - double_t com, int adjust, int ignore_na, int minp, int bias): +def ewmcov(float64_t[:] input_x, float64_t[:] input_y, + float64_t com, int adjust, int ignore_na, int minp, int bias): """ Compute exponentially-weighted moving variance using center-of-mass. @@ -1793,10 +1821,10 @@ def ewmcov(double_t[:] input_x, double_t[:] input_y, cdef: Py_ssize_t N = len(input_x) - double alpha, old_wt_factor, new_wt, mean_x, mean_y, cov - double sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y + float64_t alpha, old_wt_factor, new_wt, mean_x, mean_y, cov + float64_t sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y Py_ssize_t i, nobs - ndarray[double_t] output + ndarray[float64_t] output if len(input_y) != N: raise ValueError("arrays are of different lengths " diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 852c4fb910560..32e687c4a958a 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -3,7 +3,6 @@ import warnings import numpy as np -from numpy import nan import pytest from pandas._libs.sparse import IntIndex @@ -24,7 +23,8 @@ def kind(request): class TestSparseArray(object): def setup_method(self, method): - self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6]) + self.arr_data = np.array([np.nan, np.nan, 1, 2, 3, + np.nan, 4, 5, np.nan, 6]) self.arr = SparseArray(self.arr_data) self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 65459735e639b..652370d5529c0 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -6,7 +6,6 @@ import pytest -from numpy import nan import numpy as np from pandas.compat import range @@ -328,7 +327,7 @@ def test_combineFrame(self): frame_copy = self.frame.reindex(self.frame.index[::2]) del frame_copy['D'] - frame_copy['C'][:5] = nan + frame_copy['C'][:5] = np.nan added = self.frame + frame_copy diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 668613c494a47..01dee47fffe49 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -7,7 +7,6 @@ import sys import textwrap -from numpy import nan import numpy as np import pytest @@ -49,8 +48,8 @@ def test_repr_mixed_big(self): biggie = DataFrame({'A': np.random.randn(200), 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie.loc[:20, 'A'] = nan - biggie.loc[:20, 'B'] = nan + biggie.loc[:20, 'A'] = np.nan + biggie.loc[:20, 'B'] = np.nan foo = repr(biggie) # noqa diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index eecbdc0130f02..1c354c25c4439 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -6,7 +6,6 @@ import pytest -from numpy import nan from numpy.random import randn import numpy as np @@ -517,8 +516,8 @@ def test_first_last_valid(self, data, idx, expected_first, expected_last): N = len(self.frame.index) mat = randn(N) - mat[:5] = nan - mat[-5:] = nan + mat[:5] = np.nan + mat[-5:] = np.nan frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() @@ -534,7 +533,7 @@ def test_first_last_valid(self, data, idx, assert empty.first_valid_index() is None # GH17400: no valid entries - frame[:] = nan + frame[:] = np.nan assert frame.last_valid_index() is None assert frame.first_valid_index() is None diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index aa91b7510a2b5..cf6a556ca58e9 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -6,7 +6,6 @@ import csv import pytest -from numpy import nan import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) @@ -52,7 +51,7 @@ def test_from_csv_deprecation(self): def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: - self.frame['A'][:5] = nan + self.frame['A'][:5] = np.nan self.frame.to_csv(path) self.frame.to_csv(path, columns=['A', 'B']) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index d8a545b323674..578fc4ab42d6a 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -9,7 +9,6 @@ import pytest import numpy as np -from numpy import nan import pandas as pd from pandas import (bdate_range, DataFrame, Index, Series, Timestamp, @@ -36,11 +35,11 @@ 'max', ]) def test_cythonized_aggers(op_name): - data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., nan, nan], + data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., np.nan, np.nan], 'B': ['A', 'B'] * 6, 'C': np.random.randn(12)} df = DataFrame(data) - df.loc[2:10:2, 'C'] = nan + df.loc[2:10:2, 'C'] = np.nan op = lambda x: getattr(x, op_name)() diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 35bd99ff2eda8..df7966d8323e3 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -5,7 +5,6 @@ import operator import numpy as np -from numpy import nan import pytest import pandas.compat as compat @@ -750,12 +749,12 @@ def _check_fill(meth, op, a, b, fill_value=0): with np.errstate(all='ignore'): if amask[i]: if bmask[i]: - exp_values.append(nan) + exp_values.append(np.nan) continue exp_values.append(op(fill_value, b[i])) elif bmask[i]: if amask[i]: - exp_values.append(nan) + exp_values.append(np.nan) continue exp_values.append(op(a[i], fill_value)) else: @@ -765,8 +764,8 @@ def _check_fill(meth, op, a, b, fill_value=0): expected = Series(exp_values, exp_index) assert_series_equal(result, expected) - a = Series([nan, 1., 2., 3., nan], index=np.arange(5)) - b = Series([nan, 1, nan, 3, nan, 4.], index=np.arange(6)) + a = Series([np.nan, 1., 2., 3., np.nan], index=np.arange(5)) + b = Series([np.nan, 1, np.nan, 3, np.nan, 4.], index=np.arange(6)) result = op(a, b) exp = equiv_op(a, b)