From f9a95e7bc6f0c334ad1626f52122be9eb943f6b6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 11:50:40 -0700 Subject: [PATCH 01/16] remove unused is_period --- pandas/core/dtypes/common.py | 28 ---------------------------- pandas/tests/dtypes/test_common.py | 6 ------ pandas/tests/dtypes/test_dtypes.py | 7 +------ 3 files changed, 1 insertion(+), 40 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 355bf58540219..1e5aecb074a51 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -304,34 +304,6 @@ def is_offsetlike(arr_or_obj): return False -def is_period(arr): - """ - Check whether an array-like is a periodical index. - - Parameters - ---------- - arr : array-like - The array-like to check. - - Returns - ------- - boolean : Whether or not the array-like is a periodical index. - - Examples - -------- - >>> is_period([1, 2, 3]) - False - >>> is_period(pd.Index([1, 2, 3])) - False - >>> is_period(pd.PeriodIndex(["2017-01-01"], freq="D")) - True - """ - - # TODO: do we need this function? - # It seems like a repeat of is_period_arraylike. - return isinstance(arr, ABCPeriodIndex) or is_period_arraylike(arr) - - def is_datetime64_dtype(arr_or_dtype): """ Check whether an array-like or dtype is of the datetime64 dtype. diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a7a9faa9e77eb..54c6d33c4f0a0 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -171,12 +171,6 @@ def test_is_datetimetz(): assert com.is_datetimetz(s) -def test_is_period(): - assert not com.is_period([1, 2, 3]) - assert not com.is_period(pd.Index([1, 2, 3])) - assert com.is_period(pd.PeriodIndex(["2017-01-01"], freq="D")) - - def test_is_datetime64_dtype(): assert not com.is_datetime64_dtype(object) assert not com.is_datetime64_dtype([1, 2, 3]) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 02ac7fc7d5ed7..fb2aca7fc66e1 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, - is_period_dtype, is_period, + is_period_dtype, is_dtype_equal, is_datetime64_ns_dtype, is_datetime64_dtype, is_interval_dtype, is_datetime64_any_dtype, is_string_dtype, @@ -363,20 +363,15 @@ def test_basic(self): assert is_period_dtype(pidx.dtype) assert is_period_dtype(pidx) - assert is_period(pidx) s = Series(pidx, name='A') # dtypes # series results in object dtype currently, - # is_period checks period_arraylike assert not is_period_dtype(s.dtype) assert not is_period_dtype(s) - assert is_period(s) assert not is_period_dtype(np.dtype('float64')) assert not is_period_dtype(1.0) - assert not is_period(np.dtype('float64')) - assert not is_period(1.0) def test_empty(self): dt = PeriodDtype() From 4e05a07075e0441f362d2a242f31ed048d3b20dc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 12:05:33 -0700 Subject: [PATCH 02/16] move unused utils to _libs.util, docstrings --- pandas/_libs/src/numpy_helper.h | 6 +- pandas/_libs/tslibs/util.pxd | 197 +++++++++++++++++--------------- pandas/_libs/util.pxd | 78 +++++++++++++ 3 files changed, 183 insertions(+), 98 deletions(-) diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 98eca92fd1ab2..9a32cb9b29eb7 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -16,8 +16,6 @@ The full license is in the LICENSE file, distributed with this software. #include "numpy/arrayscalars.h" -PANDAS_INLINE npy_int64 get_nat(void) { return NPY_MIN_INT64; } - PANDAS_INLINE int assign_value_1d(PyArrayObject* ap, Py_ssize_t _i, PyObject* v) { npy_intp i = (npy_intp)_i; @@ -49,7 +47,9 @@ PANDAS_INLINE PyObject* char_to_string(const char* data) { } void set_array_not_contiguous(PyArrayObject* ao) { - ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); + // Numpy>=1.8-compliant equivalent to: + // ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); + PyArray_CLEARFLAGS(ao, (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS)); } #endif // PANDAS__LIBS_SRC_NUMPY_HELPER_H_ diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index efdb1570ed878..195e8cd57edc5 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -1,4 +1,3 @@ -from numpy cimport ndarray cimport numpy as cnp cnp.import_array() @@ -29,6 +28,30 @@ cdef extern from "numpy/ndarrayobject.h": bint PyArray_IsIntegerScalar(obj) nogil bint PyArray_Check(obj) nogil + +cdef extern from "../src/numpy_helper.h": + object char_to_string(char*) + + +cdef extern from "../src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + + +cdef inline int64_t get_nat(): + return INT64_MIN + + # -------------------------------------------------------------------- # Type Checking @@ -41,130 +64,110 @@ cdef inline bint is_integer_object(object obj) nogil: cdef inline bint is_float_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (float, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_float : bool + """ return (PyFloat_Check(obj) or (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) cdef inline bint is_complex_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (complex, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_complex : bool + """ return (PyComplex_Check(obj) or PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) cdef inline bint is_bool_object(object obj) nogil: - return (PyBool_Check(obj) or - PyObject_TypeCheck(obj, &PyBoolArrType_Type)) - - -cdef inline bint is_timedelta64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - - -cdef inline bint is_datetime64_object(object obj) nogil: - return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - -# -------------------------------------------------------------------- - -cdef extern from "../src/numpy_helper.h": - void set_array_not_contiguous(ndarray ao) - - int assign_value_1d(ndarray, Py_ssize_t, object) except -1 - cnp.int64_t get_nat() - object get_value_1d(ndarray, Py_ssize_t) - const char *get_c_string(object) except NULL - object char_to_string(char*) - -ctypedef fused numeric: - cnp.int8_t - cnp.int16_t - cnp.int32_t - cnp.int64_t - - cnp.uint8_t - cnp.uint16_t - cnp.uint32_t - cnp.uint64_t + """ + Cython equivalent of `isinstance(val, (bool, np.bool_))` - cnp.float32_t - cnp.float64_t + Parameters + ---------- + val : object -cdef extern from "../src/headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN + Returns + ------- + is_bool : bool + """ + return (PyBool_Check(obj) or + PyObject_TypeCheck(obj, &PyBoolArrType_Type)) -cdef inline object get_value_at(ndarray arr, object loc): - cdef: - Py_ssize_t i, sz - int casted +cdef inline bint is_timedelta64_object(object val) nogil: + """ + Cython equivalent of `isinstance(val, np.timedelta64)` - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) + Parameters + ---------- + val : object - if i < 0 and sz > 0: - i += sz - elif i >= sz or sz == 0: - raise IndexError('index out of bounds') + Returns + ------- + is_timedelta64 : bool + """ + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) - return get_value_1d(arr, i) +cdef inline bint is_datetime64_object(object val) nogil: + """ + Cython equivalent of `isinstance(val, np.datetime64)` -cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): - """Sets a value into the array without checking the writeable flag. + Parameters + ---------- + val : object - This should be used when setting values in a loop, check the writeable - flag above the loop and then eschew the check on each iteration. + Returns + ------- + is_datetime64 : bool """ - cdef: - Py_ssize_t i, sz - if is_float_object(loc): - casted = int(loc) - if casted == loc: - loc = casted - i = loc - sz = cnp.PyArray_SIZE(arr) - - if i < 0: - i += sz - elif i >= sz: - raise IndexError('index out of bounds') + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) - assign_value_1d(arr, i, value) -cdef inline set_value_at(ndarray arr, object loc, object value): - """Sets a value into the array after checking that the array is mutable. +cdef inline bint is_array(object val): """ - if not cnp.PyArray_ISWRITEABLE(arr): - raise ValueError('assignment destination is read-only') - - set_value_at_unsafe(arr, loc, value) + Cython equivalent of `isinstance(val, np.ndarray)` + Parameters + ---------- + val : object -cdef inline is_array(object o): - return cnp.PyArray_Check(o) + Returns + ------- + is_ndarray : bool + """ + return PyArray_Check(val) -cdef inline bint _checknull(object val): - try: - return val is None or (cpython.PyFloat_Check(val) and val != val) - except ValueError: - return False +cdef inline bint is_period_object(object val): + """ + Cython equivalent of `isinstance(val, pd.Period)` + Parameters + ---------- + val : object -cdef inline bint is_period_object(object val): + Returns + ------- + is_period : bool + """ return getattr(val, '_typ', '_typ') == 'period' @@ -181,3 +184,7 @@ cdef inline bint is_offset_object(object val): is_date_offset : bool """ return getattr(val, '_typ', None) == "dateoffset" + + +cdef inline bint _checknull(object val): + return val is None or (PyFloat_Check(val) and val != val) diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index 0b7e66902cbb1..54339aab55745 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -1 +1,79 @@ from tslibs.util cimport * + +from cython cimport Py_ssize_t + +from numpy cimport ndarray + + +cdef extern from "src/numpy_helper.h": + void set_array_not_contiguous(ndarray ao) + + int assign_value_1d(ndarray, Py_ssize_t, object) except -1 + object get_value_1d(ndarray, Py_ssize_t) + const char *get_c_string(object) except NULL + + +ctypedef fused numeric: + cnp.int8_t + cnp.int16_t + cnp.int32_t + cnp.int64_t + + cnp.uint8_t + cnp.uint16_t + cnp.uint32_t + cnp.uint64_t + + cnp.float32_t + cnp.float64_t + + +cdef inline object get_value_at(ndarray arr, object loc): + cdef: + Py_ssize_t i, sz + int casted + + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0 and sz > 0: + i += sz + elif i >= sz or sz == 0: + raise IndexError('index out of bounds') + + return get_value_1d(arr, i) + + +cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): + """Sets a value into the array without checking the writeable flag. + + This should be used when setting values in a loop, check the writeable + flag above the loop and then eschew the check on each iteration. + """ + cdef: + Py_ssize_t i, sz + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + i = loc + sz = cnp.PyArray_SIZE(arr) + + if i < 0: + i += sz + elif i >= sz: + raise IndexError('index out of bounds') + + assign_value_1d(arr, i, value) + +cdef inline set_value_at(ndarray arr, object loc, object value): + """Sets a value into the array after checking that the array is mutable. + """ + if not cnp.PyArray_ISWRITEABLE(arr): + raise ValueError('assignment destination is read-only') + + set_value_at_unsafe(arr, loc, value) From d1cfac816af66f176bf4095e1994d65b1eb18411 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 12:08:54 -0700 Subject: [PATCH 03/16] remove usages of deprecated API --- pandas/_libs/algos.pyx | 2 +- pandas/_libs/groupby.pyx | 4 +++- pandas/_libs/tslibs/conversion.pyx | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index ecfc7355dddfc..124792638e3df 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -129,7 +129,7 @@ def is_lexsorted(list list_of_arrays): for i in range(nlevels): arr = list_of_arrays[i] assert arr.dtype.name == 'int64' - vecs[i] = arr.data + vecs[i] = cnp.PyArray_DATA(arr) # Assume uniqueness?? with nogil: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 5e4a431caca00..5681d01c6bb25 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -7,10 +7,12 @@ from cython cimport Py_ssize_t from libc.stdlib cimport malloc, free import numpy as np +cimport numpy as cnp from numpy cimport (ndarray, double_t, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) +cnp.import_array() from util cimport numeric, get_nat @@ -118,7 +120,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, counts[:] = _counts[1:] data = np.empty((K, N), dtype=np.float64) - ptr = data.data + ptr = cnp.PyArray_DATA(data) take_2d_axis1_float64_float64(values.T, indexer, out=data) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7621ac912d4d5..4335e7baeafe9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -888,7 +888,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, trans, deltas, typ = get_dst_info(tz) - tdata = trans.data + tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) result_a = np.empty(n, dtype=np.int64) From cf84856b1c5754ace04521c3937f4e36d6b8724a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 12:10:00 -0700 Subject: [PATCH 04/16] docstring --- pandas/_libs/tslibs/util.pxd | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 195e8cd57edc5..692f0cb962b59 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -56,6 +56,17 @@ cdef inline int64_t get_nat(): # Type Checking cdef inline bint is_string_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, compat.string_types)` + + Parameters + ---------- + val : object + + Returns + ------- + is_string : bool + """ return PyString_Check(obj) or PyUnicode_Check(obj) From abda1924c0cd9059fb3db241a1bfea970aeddf89 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 13:09:03 -0700 Subject: [PATCH 05/16] whitespace, docstring cleanup --- pandas/_libs/tslibs/util.pxd | 5 +++-- pandas/_libs/util.pxd | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 692f0cb962b59..8d07951bf28f9 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -18,6 +18,7 @@ cdef extern from "Python.h": cdef extern from "numpy/arrayobject.h": PyTypeObject PyFloatingArrType_Type + ctypedef signed long long int64_t cdef extern from "numpy/ndarrayobject.h": PyTypeObject PyTimedeltaArrType_Type @@ -122,7 +123,7 @@ cdef inline bint is_bool_object(object obj) nogil: PyObject_TypeCheck(obj, &PyBoolArrType_Type)) -cdef inline bint is_timedelta64_object(object val) nogil: +cdef inline bint is_timedelta64_object(object obj) nogil: """ Cython equivalent of `isinstance(val, np.timedelta64)` @@ -137,7 +138,7 @@ cdef inline bint is_timedelta64_object(object val) nogil: return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) -cdef inline bint is_datetime64_object(object val) nogil: +cdef inline bint is_datetime64_object(object obj) nogil: """ Cython equivalent of `isinstance(val, np.datetime64)` diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index 54339aab55745..daaa17f6a8b13 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -70,6 +70,7 @@ cdef inline set_value_at_unsafe(ndarray arr, object loc, object value): assign_value_1d(arr, i, value) + cdef inline set_value_at(ndarray arr, object loc, object value): """Sets a value into the array after checking that the array is mutable. """ From 6d8c335744b6853c8649247fc78d266e55030917 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 13:15:13 -0700 Subject: [PATCH 06/16] use util.is_array --- pandas/_libs/index.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 31ef4b7a3e807..5918560cf1436 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -37,7 +37,7 @@ cdef inline bint is_definitely_invalid_key(object val): return True # we have a _data, means we are a NDFrame - return (PySlice_Check(val) or cnp.PyArray_Check(val) + return (PySlice_Check(val) or util.is_array(val) or PyList_Check(val) or hasattr(val, '_data')) @@ -104,7 +104,7 @@ cdef class IndexEngine: void* data_ptr loc = self.get_loc(key) - if PySlice_Check(loc) or cnp.PyArray_Check(loc): + if PySlice_Check(loc) or util.is_array(loc): return arr[loc] else: return get_value_at(arr, loc, tz=tz) @@ -120,7 +120,7 @@ cdef class IndexEngine: loc = self.get_loc(key) value = convert_scalar(arr, value) - if PySlice_Check(loc) or cnp.PyArray_Check(loc): + if PySlice_Check(loc) or util.is_array(loc): arr[loc] = value else: util.set_value_at(arr, loc, value) From 9360a85e6d8d0240e17bbc2a3f6693ca6bca2e1e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 13:42:16 -0700 Subject: [PATCH 07/16] update import --- pandas/core/dtypes/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index 738e1ea9062f6..95bc06e16cab6 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -22,7 +22,6 @@ is_datetime64_ns_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, - is_period, is_period_dtype, # string-like From ca056364cc7b5656c10738747b68a3e80a21bede Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 15:20:41 -0700 Subject: [PATCH 08/16] update test --- pandas/tests/api/test_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index bd4891326c751..4bd8f85e4a21d 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -24,7 +24,7 @@ class TestTypes(Base): 'is_object_dtype', 'is_scalar', 'is_sparse', 'is_string_dtype', 'is_signed_integer_dtype', 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', - 'is_unsigned_integer_dtype', 'is_period', + 'is_unsigned_integer_dtype', 'is_period_dtype', 'is_interval', 'is_interval_dtype', 'is_re', 'is_re_compilable', 'is_dict_like', 'is_iterator', 'is_file_like', From 1f326e21f138a0d7a2292a6c306adf4f696c8d2a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 16:41:46 -0700 Subject: [PATCH 09/16] remove unused import --- pandas/_libs/tslibs/util.pxd | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 8d07951bf28f9..931d7ccebcd28 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -1,7 +1,6 @@ cimport numpy as cnp cnp.import_array() -cimport cpython from cpython cimport PyTypeObject From 82228e1e4e5b7900a22421a989c531cb29f3c8d2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 26 Jul 2018 17:04:30 -0700 Subject: [PATCH 10/16] avoid need for cnp in util --- pandas/_libs/tslibs/util.pxd | 7 +++++-- pandas/_libs/util.pxd | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 931d7ccebcd28..b2febd8ca4dde 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -1,5 +1,3 @@ -cimport numpy as cnp -cnp.import_array() from cpython cimport PyTypeObject @@ -18,6 +16,7 @@ cdef extern from "Python.h": cdef extern from "numpy/arrayobject.h": PyTypeObject PyFloatingArrType_Type ctypedef signed long long int64_t + int _import_array() except -1 cdef extern from "numpy/ndarrayobject.h": PyTypeObject PyTimedeltaArrType_Type @@ -52,6 +51,10 @@ cdef inline int64_t get_nat(): return INT64_MIN +cdef inline int import_array() except -1: + _import_array() + + # -------------------------------------------------------------------- # Type Checking diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index daaa17f6a8b13..134f34330d8aa 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -2,6 +2,7 @@ from tslibs.util cimport * from cython cimport Py_ssize_t +cimport numpy as cnp from numpy cimport ndarray From d318cc8eae14bce3e13b4af1e7ac58845167d4af Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 27 Jul 2018 12:09:43 -0700 Subject: [PATCH 11/16] troubleshoot appveyor failure --- pandas/_libs/tslibs/util.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index b2febd8ca4dde..df893c20d4baa 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -44,7 +44,7 @@ cdef extern from "../src/headers/stdint.h": enum: INT32_MAX enum: INT32_MIN enum: INT64_MAX - enum: INT64_MIN + int64_t INT64_MIN cdef inline int64_t get_nat(): From d25070d8e1a5148772f925f10dac2b43a344c099 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 27 Jul 2018 13:20:37 -0700 Subject: [PATCH 12/16] try getting NPY_MIN_INT64 directly --- pandas/_libs/tslibs/util.pxd | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index df893c20d4baa..f781f26b52512 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -27,6 +27,9 @@ cdef extern from "numpy/ndarrayobject.h": bint PyArray_IsIntegerScalar(obj) nogil bint PyArray_Check(obj) nogil +cdef extern from "numpy/npy_common.h": + int64_t NPY_MIN_INT64 + cdef extern from "../src/numpy_helper.h": object char_to_string(char*) @@ -44,11 +47,11 @@ cdef extern from "../src/headers/stdint.h": enum: INT32_MAX enum: INT32_MIN enum: INT64_MAX - int64_t INT64_MIN + enum: INT64_MIN cdef inline int64_t get_nat(): - return INT64_MIN + return NPY_MIN_INT64 cdef inline int import_array() except -1: From bed3eed0864b9b9c124573afbdacc38aafb9b92d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 27 Jul 2018 13:42:04 -0700 Subject: [PATCH 13/16] disentangle util from src, docstring --- pandas/_libs/src/numpy_helper.h | 7 ------- pandas/_libs/tslibs/util.pxd | 33 +++++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 9a32cb9b29eb7..f2368383f18c9 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -38,13 +38,6 @@ PANDAS_INLINE const char* get_c_string(PyObject* obj) { #endif } -PANDAS_INLINE PyObject* char_to_string(const char* data) { -#if PY_VERSION_HEX >= 0x03000000 - return PyUnicode_FromString(data); -#else - return PyString_FromString(data); -#endif -} void set_array_not_contiguous(PyArrayObject* ao) { // Numpy>=1.8-compliant equivalent to: diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index f781f26b52512..624ed7ced2654 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -1,6 +1,18 @@ from cpython cimport PyTypeObject +cdef extern from *: + """ + PyObject* char_to_string(const char* data) { + #if PY_VERSION_HEX >= 0x03000000 + return PyUnicode_FromString(data); + #else + return PyString_FromString(data); + #endif + } + """ + object char_to_string(const char* data) + cdef extern from "Python.h": # Note: importing extern-style allows us to declare these as nogil @@ -31,10 +43,6 @@ cdef extern from "numpy/npy_common.h": int64_t NPY_MIN_INT64 -cdef extern from "../src/numpy_helper.h": - object char_to_string(char*) - - cdef extern from "../src/headers/stdint.h": enum: UINT8_MAX enum: UINT16_MAX @@ -77,6 +85,23 @@ cdef inline bint is_string_object(object obj) nogil: cdef inline bint is_integer_object(object obj) nogil: + """ + Cython equivalent of + + `isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)` + + Parameters + ---------- + val : object + + Returns + ------- + is_integer : bool + + Notes + ----- + This counts np.timedelta64 objects as integers. + """ return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) From 6320a2c06e6c2b55d3ef89274e42f5468e41d323 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 27 Jul 2018 17:10:03 -0700 Subject: [PATCH 14/16] dummy commit to allow push --- pandas/_libs/src/numpy_helper.h | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index f2368383f18c9..753cba6ce62aa 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -38,7 +38,6 @@ PANDAS_INLINE const char* get_c_string(PyObject* obj) { #endif } - void set_array_not_contiguous(PyArrayObject* ao) { // Numpy>=1.8-compliant equivalent to: // ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); From d9522720f7e4dd7b2a148402ff2cbafc5633d34e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 29 Jul 2018 10:21:25 -0700 Subject: [PATCH 15/16] revert removal of is_period --- pandas/core/dtypes/common.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 1e5aecb074a51..905073645fcb3 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -21,9 +21,9 @@ is_named_tuple, is_array_like, is_decimal, is_complex, is_interval) -_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name - for t in ['O', 'int8', 'uint8', 'int16', 'uint16', - 'int32', 'uint32', 'int64', 'uint64']]) +_POSSIBLY_CAST_DTYPES = {np.dtype(t).name + for t in ['O', 'int8', 'uint8', 'int16', 'uint16', + 'int32', 'uint32', 'int64', 'uint64']} _NS_DTYPE = conversion.NS_DTYPE _TD_DTYPE = conversion.TD_DTYPE @@ -304,6 +304,34 @@ def is_offsetlike(arr_or_obj): return False +def is_period(arr): + """ + Check whether an array-like is a periodical index. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean : Whether or not the array-like is a periodical index. + + Examples + -------- + >>> is_period([1, 2, 3]) + False + >>> is_period(pd.Index([1, 2, 3])) + False + >>> is_period(pd.PeriodIndex(["2017-01-01"], freq="D")) + True + """ + + # TODO: do we need this function? + # It seems like a repeat of is_period_arraylike. + return isinstance(arr, ABCPeriodIndex) or is_period_arraylike(arr) + + def is_datetime64_dtype(arr_or_dtype): """ Check whether an array-like or dtype is of the datetime64 dtype. From e6fa730d033ede36d6101a946fc34b01dfcf55a9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 29 Jul 2018 17:48:38 -0700 Subject: [PATCH 16/16] revert is_period changes --- pandas/core/dtypes/api.py | 1 + pandas/tests/api/test_types.py | 2 +- pandas/tests/dtypes/test_common.py | 6 ++++++ pandas/tests/dtypes/test_dtypes.py | 7 ++++++- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index 95bc06e16cab6..738e1ea9062f6 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -22,6 +22,7 @@ is_datetime64_ns_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, + is_period, is_period_dtype, # string-like diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 4bd8f85e4a21d..bd4891326c751 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -24,7 +24,7 @@ class TestTypes(Base): 'is_object_dtype', 'is_scalar', 'is_sparse', 'is_string_dtype', 'is_signed_integer_dtype', 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', - 'is_unsigned_integer_dtype', + 'is_unsigned_integer_dtype', 'is_period', 'is_period_dtype', 'is_interval', 'is_interval_dtype', 'is_re', 'is_re_compilable', 'is_dict_like', 'is_iterator', 'is_file_like', diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 54c6d33c4f0a0..a7a9faa9e77eb 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -171,6 +171,12 @@ def test_is_datetimetz(): assert com.is_datetimetz(s) +def test_is_period(): + assert not com.is_period([1, 2, 3]) + assert not com.is_period(pd.Index([1, 2, 3])) + assert com.is_period(pd.PeriodIndex(["2017-01-01"], freq="D")) + + def test_is_datetime64_dtype(): assert not com.is_datetime64_dtype(object) assert not com.is_datetime64_dtype([1, 2, 3]) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index fb2aca7fc66e1..02ac7fc7d5ed7 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, - is_period_dtype, + is_period_dtype, is_period, is_dtype_equal, is_datetime64_ns_dtype, is_datetime64_dtype, is_interval_dtype, is_datetime64_any_dtype, is_string_dtype, @@ -363,15 +363,20 @@ def test_basic(self): assert is_period_dtype(pidx.dtype) assert is_period_dtype(pidx) + assert is_period(pidx) s = Series(pidx, name='A') # dtypes # series results in object dtype currently, + # is_period checks period_arraylike assert not is_period_dtype(s.dtype) assert not is_period_dtype(s) + assert is_period(s) assert not is_period_dtype(np.dtype('float64')) assert not is_period_dtype(1.0) + assert not is_period(np.dtype('float64')) + assert not is_period(1.0) def test_empty(self): dt = PeriodDtype()