PERF: tighter cython declarations, faster __iter__ (#43872)

pandas-dev · Oct 5, 2021 · 6599834 · 6599834
1 parent 4e8b77d
commit 6599834
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 36 deletions.
diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
@@ -8,18 +8,16 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 # ensure_dtype
 # ----------------------------------------------------------------------
 
-cdef int PLATFORM_INT = (<ndarray>np.arange(0, dtype=np.intp)).descr.type_num
-
 
 def ensure_platform_int(object arr):
     # GH3033, GH1392
     # platform int is the size of the int pointer, e.g. np.intp
     if util.is_array(arr):
-        if (<ndarray>arr).descr.type_num == PLATFORM_INT:
+        if (<ndarray>arr).descr.type_num == cnp.NPY_INTP:
             return arr
         else:
             # equiv: arr.astype(np.intp)
-            return cnp.PyArray_Cast(<ndarray>arr, PLATFORM_INT)
+            return cnp.PyArray_Cast(<ndarray>arr, cnp.NPY_INTP)
     else:
         return np.array(arr, dtype=np.intp)
 

diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
@@ -103,7 +103,7 @@ def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[intp_t] indexer,
+                                    ndarray[intp_t, ndim=1] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
     cdef:
@@ -158,7 +158,7 @@ def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[intp_t] indexer,
+                                    ndarray[intp_t, ndim=1] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
 
@@ -195,8 +195,8 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
                                     fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        ndarray[intp_t] idx0 = indexer[0]
-        ndarray[intp_t] idx1 = indexer[1]
+        ndarray[intp_t, ndim=1] idx0 = indexer[0]
+        ndarray[intp_t, ndim=1] idx1 = indexer[1]
         {{c_type_out}} fv
 
     n = len(idx0)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -227,7 +227,7 @@ cdef class BlockPlacement:
         cdef:
             slice nv, s = self._ensure_has_slice()
             Py_ssize_t other_int, start, stop, step, l
-            ndarray newarr
+            ndarray[intp_t, ndim=1] newarr
 
         if s is not None:
             # see if we are either all-above or all-below, each of which
@@ -260,7 +260,7 @@ cdef class BlockPlacement:
         cdef:
             slice slc = self._ensure_has_slice()
             slice new_slice
-            ndarray new_placement
+            ndarray[intp_t, ndim=1] new_placement
 
         if slc is not None and slc.step == 1:
             new_slc = slice(slc.start * factor, slc.stop * factor, 1)
@@ -345,7 +345,9 @@ cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -
     return length
 
 
-cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
+cdef (Py_ssize_t, Py_ssize_t, Py_ssize_t, Py_ssize_t) slice_get_indices_ex(
+    slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX
+):
     """
     Get (start, stop, step, length) tuple for a slice.
 
@@ -460,9 +462,11 @@ def get_blkno_indexers(
     # blockno handling.
     cdef:
         int64_t cur_blkno
-        Py_ssize_t i, start, stop, n, diff, tot_len
+        Py_ssize_t i, start, stop, n, diff
+        cnp.npy_intp tot_len
         int64_t blkno
         object group_dict = defaultdict(list)
+        ndarray[int64_t, ndim=1] arr
 
     n = blknos.shape[0]
     result = list()
@@ -495,7 +499,8 @@ def get_blkno_indexers(
                 result.append((blkno, slice(slices[0][0], slices[0][1])))
             else:
                 tot_len = sum(stop - start for start, stop in slices)
-                arr = np.empty(tot_len, dtype=np.int64)
+                # equiv np.empty(tot_len, dtype=np.int64)
+                arr = cnp.PyArray_EMPTY(1, &tot_len, cnp.NPY_INT64, 0)
 
                 i = 0
                 for start, stop in slices:
@@ -526,16 +531,21 @@ def get_blkno_placements(blknos, group: bool = True):
         yield blkno, BlockPlacement(indexer)
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
 cpdef update_blklocs_and_blknos(
-    ndarray[intp_t] blklocs, ndarray[intp_t] blknos, Py_ssize_t loc, intp_t nblocks
+    ndarray[intp_t, ndim=1] blklocs,
+    ndarray[intp_t, ndim=1] blknos,
+    Py_ssize_t loc,
+    intp_t nblocks,
 ):
     """
     Update blklocs and blknos when a new column is inserted at 'loc'.
     """
     cdef:
         Py_ssize_t i
         cnp.npy_intp length = len(blklocs) + 1
-        ndarray[intp_t] new_blklocs, new_blknos
+        ndarray[intp_t, ndim=1] new_blklocs, new_blknos
 
     # equiv: new_blklocs = np.empty(length, dtype=np.intp)
     new_blklocs = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0)
@@ -693,7 +703,7 @@ cdef class BlockManager:
             cnp.npy_intp length = self.shape[0]
             SharedBlock blk
             BlockPlacement bp
-            ndarray[intp_t] new_blknos, new_blklocs
+            ndarray[intp_t, ndim=1] new_blknos, new_blklocs
 
         # equiv: np.empty(length, dtype=np.intp)
         new_blknos = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0)
@@ -711,7 +721,11 @@ cdef class BlockManager:
                 new_blknos[j] = blkno
                 new_blklocs[j] = i
 
-        for blkno in new_blknos:
+        for i in range(length):
+            # faster than `for blkno in new_blknos`
+            #  https://github.com/cython/cython/issues/4393
+            blkno = new_blknos[i]
+
             # If there are any -1s remaining, this indicates that our mgr_locs
             #  are invalid.
             if blkno == -1:

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -448,7 +448,7 @@ def fast_zip(list ndarrays) -> ndarray[object]:
     """
     cdef:
         Py_ssize_t i, j, k, n
-        ndarray[object] result
+        ndarray[object, ndim=1] result
         flatiter it
         object val, tup
 
@@ -507,7 +507,7 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray:
     """
     cdef:
         Py_ssize_t i, n = len(indexer)
-        ndarray[intp_t] rev_indexer
+        ndarray[intp_t, ndim=1] rev_indexer
         intp_t idx
 
     rev_indexer = np.empty(length, dtype=np.intp)
@@ -540,7 +540,7 @@ def has_infs(floating[:] arr) -> bool:
     return ret
 
 
-def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len):
+def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len):
     cdef:
         Py_ssize_t i, n = len(indices)
         int k, vstart, vlast, v
@@ -579,7 +579,7 @@ def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def maybe_booleans_to_slice(ndarray[uint8_t] mask):
+def maybe_booleans_to_slice(ndarray[uint8_t, ndim=1] mask):
     cdef:
         Py_ssize_t i, n = len(mask)
         Py_ssize_t start = 0, end = 0
@@ -775,14 +775,14 @@ def is_all_arraylike(obj: list) -> bool:
 # is a general, O(max(len(values), len(binner))) method.
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def generate_bins_dt64(ndarray[int64_t] values, const int64_t[:] binner,
+def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner,
                        object closed='left', bint hasnans=False):
     """
     Int64 (datetime64) version of generic python version in ``groupby.py``.
     """
     cdef:
         Py_ssize_t lenidx, lenbin, i, j, bc, vc
-        ndarray[int64_t] bins
+        ndarray[int64_t, ndim=1] bins
         int64_t l_bin, r_bin, nat_count
         bint right_closed = closed == 'right'
 
@@ -931,7 +931,7 @@ def generate_slices(const intp_t[:] labels, Py_ssize_t ngroups):
     return np.asarray(starts), np.asarray(ends)
 
 
-def indices_fast(ndarray[intp_t] index, const int64_t[:] labels, list keys,
+def indices_fast(ndarray[intp_t, ndim=1] index, const int64_t[:] labels, list keys,
                  list sorted_labels) -> dict:
     """
     Parameters
@@ -2067,7 +2067,9 @@ cdef bint is_period_array(ndarray[object] values):
     if len(values) == 0:
         return False
 
-    for val in values:
+    for i in range(n):
+        val = values[i]
+
         if is_period_object(val):
             if dtype_code == -10000:
                 dtype_code = val._dtype._dtype_code
@@ -2102,7 +2104,9 @@ cpdef bint is_interval_array(ndarray values):
     if len(values) == 0:
         return False
 
-    for val in values:
+    for i in range(n):
+        val = values[i]
+
         if is_interval(val):
             if closed is None:
                 closed = val.closed
@@ -2144,7 +2148,7 @@ cpdef bint is_interval_array(ndarray values):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def maybe_convert_numeric(
-    ndarray[object] values,
+    ndarray[object, ndim=1] values,
     set na_values,
     bint convert_empty=True,
     bint coerce_numeric=False,
@@ -2205,12 +2209,12 @@ def maybe_convert_numeric(
         int status, maybe_int
         Py_ssize_t i, n = values.size
         Seen seen = Seen(coerce_numeric)
-        ndarray[float64_t] floats = np.empty(n, dtype='f8')
-        ndarray[complex128_t] complexes = np.empty(n, dtype='c16')
-        ndarray[int64_t] ints = np.empty(n, dtype='i8')
-        ndarray[uint64_t] uints = np.empty(n, dtype='u8')
-        ndarray[uint8_t] bools = np.empty(n, dtype='u1')
-        ndarray[uint8_t] mask = np.zeros(n, dtype="u1")
+        ndarray[float64_t, ndim=1] floats = np.empty(n, dtype='f8')
+        ndarray[complex128_t, ndim=1] complexes = np.empty(n, dtype='c16')
+        ndarray[int64_t, ndim=1] ints = np.empty(n, dtype='i8')
+        ndarray[uint64_t, ndim=1] uints = np.empty(n, dtype='u8')
+        ndarray[uint8_t, ndim=1] bools = np.empty(n, dtype='u1')
+        ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1")
         float64_t fval
         bint allow_null_in_int = convert_to_masked_nullable
 

diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx
@@ -7,10 +7,9 @@ from numpy cimport import_array
 
 import_array()
 
-from pandas._libs.lib import is_complex
-
 from pandas._libs.util cimport (
     is_array,
+    is_complex_object,
     is_real_number_object,
 )
 
@@ -196,7 +195,7 @@ cpdef assert_almost_equal(a, b,
                            f"with rtol={rtol}, atol={atol}")
         return True
 
-    if is_complex(a) and is_complex(b):
+    if is_complex_object(a) and is_complex_object(b):
         if array_equivalent(a, b, strict_nan=True):
             # inf comparison
             return True