Skip to content

Commit 6fe83bb

Browse files
jbrockmendeljreback
authored andcommitted
REF: cython cleanup, typing, optimizations (#23464)
* Easy bits of #23382 * Easy parts of #23368
1 parent ee7d856 commit 6fe83bb

13 files changed

+117
-116
lines changed

Diff for: pandas/_libs/algos.pyx

+3-1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ class NegInfinity(object):
7777
__ge__ = lambda self, other: isinstance(other, NegInfinity)
7878

7979

80+
@cython.wraparound(False)
81+
@cython.boundscheck(False)
8082
cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
8183
"""
8284
Efficiently find the unique first-differences of the given array.
@@ -793,7 +795,7 @@ arrmap_bool = arrmap["uint8_t"]
793795

794796
@cython.boundscheck(False)
795797
@cython.wraparound(False)
796-
def is_monotonic(ndarray[algos_t] arr, bint timelike):
798+
def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike):
797799
"""
798800
Returns
799801
-------

Diff for: pandas/_libs/groupby.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def group_any_all(ndarray[uint8_t] out,
353353
The returned values will either be 0 or 1 (False or True, respectively).
354354
"""
355355
cdef:
356-
Py_ssize_t i, N=len(labels)
356+
Py_ssize_t i, N = len(labels)
357357
int64_t lab
358358
uint8_t flag_val
359359

Diff for: pandas/_libs/groupby_helper.pxi.in

-20
Original file line numberDiff line numberDiff line change
@@ -667,11 +667,6 @@ def group_max(ndarray[groupby_t, ndim=2] out,
667667
out[i, j] = maxx[i, j]
668668

669669

670-
group_max_float64 = group_max["float64_t"]
671-
group_max_float32 = group_max["float32_t"]
672-
group_max_int64 = group_max["int64_t"]
673-
674-
675670
@cython.wraparound(False)
676671
@cython.boundscheck(False)
677672
def group_min(ndarray[groupby_t, ndim=2] out,
@@ -734,11 +729,6 @@ def group_min(ndarray[groupby_t, ndim=2] out,
734729
out[i, j] = minx[i, j]
735730

736731

737-
group_min_float64 = group_min["float64_t"]
738-
group_min_float32 = group_min["float32_t"]
739-
group_min_int64 = group_min["int64_t"]
740-
741-
742732
@cython.boundscheck(False)
743733
@cython.wraparound(False)
744734
def group_cummin(ndarray[groupby_t, ndim=2] out,
@@ -787,11 +777,6 @@ def group_cummin(ndarray[groupby_t, ndim=2] out,
787777
out[i, j] = mval
788778

789779

790-
group_cummin_float64 = group_cummin["float64_t"]
791-
group_cummin_float32 = group_cummin["float32_t"]
792-
group_cummin_int64 = group_cummin["int64_t"]
793-
794-
795780
@cython.boundscheck(False)
796781
@cython.wraparound(False)
797782
def group_cummax(ndarray[groupby_t, ndim=2] out,
@@ -837,8 +822,3 @@ def group_cummax(ndarray[groupby_t, ndim=2] out,
837822
if val > mval:
838823
accum[lab, j] = mval = val
839824
out[i, j] = mval
840-
841-
842-
group_cummax_float64 = group_cummax["float64_t"]
843-
group_cummax_float32 = group_cummax["float32_t"]
844-
group_cummax_int64 = group_cummax["int64_t"]

Diff for: pandas/_libs/hashtable_class_helper.pxi.in

+13-13
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,12 @@ cdef class {{name}}Vector:
8686
self.data.n = 0
8787
self.data.m = _INIT_VEC_CAP
8888
self.ao = np.empty(self.data.m, dtype={{idtype}})
89-
self.data.data = <{{arg}}*> self.ao.data
89+
self.data.data = <{{arg}}*>self.ao.data
9090

9191
cdef resize(self):
9292
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
9393
self.ao.resize(self.data.m, refcheck=False)
94-
self.data.data = <{{arg}}*> self.ao.data
94+
self.data.data = <{{arg}}*>self.ao.data
9595

9696
def __dealloc__(self):
9797
if self.data is not NULL:
@@ -140,7 +140,7 @@ cdef class StringVector:
140140
self.external_view_exists = False
141141
self.data.n = 0
142142
self.data.m = _INIT_VEC_CAP
143-
self.data.data = <char **> malloc(self.data.m * sizeof(char *))
143+
self.data.data = <char **>malloc(self.data.m * sizeof(char *))
144144
if not self.data.data:
145145
raise MemoryError()
146146

@@ -153,7 +153,7 @@ cdef class StringVector:
153153
self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
154154

155155
orig_data = self.data.data
156-
self.data.data = <char **> malloc(self.data.m * sizeof(char *))
156+
self.data.data = <char **>malloc(self.data.m * sizeof(char *))
157157
if not self.data.data:
158158
raise MemoryError()
159159
for i in range(m):
@@ -208,22 +208,22 @@ cdef class ObjectVector:
208208
self.n = 0
209209
self.m = _INIT_VEC_CAP
210210
self.ao = np.empty(_INIT_VEC_CAP, dtype=object)
211-
self.data = <PyObject**> self.ao.data
211+
self.data = <PyObject**>self.ao.data
212212

213213
def __len__(self):
214214
return self.n
215215

216-
cdef inline append(self, object o):
216+
cdef inline append(self, object obj):
217217
if self.n == self.m:
218218
if self.external_view_exists:
219219
raise ValueError("external reference but "
220220
"Vector.resize() needed")
221221
self.m = max(self.m * 2, _INIT_VEC_CAP)
222222
self.ao.resize(self.m, refcheck=False)
223-
self.data = <PyObject**> self.ao.data
223+
self.data = <PyObject**>self.ao.data
224224

225-
Py_INCREF(o)
226-
self.data[self.n] = <PyObject*> o
225+
Py_INCREF(obj)
226+
self.data[self.n] = <PyObject*>obj
227227
self.n += 1
228228

229229
def to_array(self):
@@ -768,7 +768,7 @@ cdef class StringHashTable(HashTable):
768768
use_na_value = na_value is not None
769769

770770
# assign pointers and pre-filter out missing
771-
vecs = <const char **> malloc(n * sizeof(char *))
771+
vecs = <const char **>malloc(n * sizeof(char *))
772772
for i in range(n):
773773
val = values[i]
774774

@@ -844,9 +844,9 @@ cdef class PyObjectHashTable(HashTable):
844844

845845
def sizeof(self, deep=False):
846846
""" return the size of my table in bytes """
847-
return self.table.n_buckets * (sizeof(PyObject *) + # keys
848-
sizeof(Py_ssize_t) + # vals
849-
sizeof(uint32_t)) # flags
847+
return self.table.n_buckets * (sizeof(PyObject *) + # keys
848+
sizeof(Py_ssize_t) + # vals
849+
sizeof(uint32_t)) # flags
850850

851851
cpdef get_item(self, object val):
852852
cdef khiter_t k

Diff for: pandas/_libs/hashtable_func_helper.pxi.in

+11-11
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ cdef build_count_table_{{dtype}}({{dtype}}_t[:] values,
4545
val = values[i]
4646

4747
if not checknull(val) or not dropna:
48-
k = kh_get_{{ttype}}(table, <PyObject*> val)
48+
k = kh_get_{{ttype}}(table, <PyObject*>val)
4949
if k != table.n_buckets:
5050
table.vals[k] += 1
5151
else:
52-
k = kh_put_{{ttype}}(table, <PyObject*> val, &ret)
52+
k = kh_put_{{ttype}}(table, <PyObject*>val, &ret)
5353
table.vals[k] = 1
5454
{{else}}
5555
with nogil:
@@ -103,7 +103,7 @@ cpdef value_count_{{dtype}}({{scalar}}[:] values, bint dropna):
103103
{{if dtype == 'object'}}
104104
for k in range(table.n_buckets):
105105
if kh_exist_{{ttype}}(table, k):
106-
result_keys[i] = <{{dtype}}> table.keys[k]
106+
result_keys[i] = <{{dtype}}>table.keys[k]
107107
result_counts[i] = table.vals[k]
108108
i += 1
109109
{{else}}
@@ -152,7 +152,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
152152
if keep == 'last':
153153
{{if dtype == 'object'}}
154154
for i from n > i >= 0:
155-
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
155+
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
156156
out[i] = ret == 0
157157
{{else}}
158158
with nogil:
@@ -163,7 +163,7 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
163163
elif keep == 'first':
164164
{{if dtype == 'object'}}
165165
for i in range(n):
166-
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
166+
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
167167
out[i] = ret == 0
168168
{{else}}
169169
with nogil:
@@ -175,13 +175,13 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
175175
{{if dtype == 'object'}}
176176
for i in range(n):
177177
value = values[i]
178-
k = kh_get_{{ttype}}(table, <PyObject*> value)
178+
k = kh_get_{{ttype}}(table, <PyObject*>value)
179179
if k != table.n_buckets:
180180
out[table.vals[k]] = 1
181181
out[i] = 1
182182
else:
183-
k = kh_put_{{ttype}}(table, <PyObject*> value, &ret)
184-
table.keys[k] = <PyObject*> value
183+
k = kh_put_{{ttype}}(table, <PyObject*>value, &ret)
184+
table.keys[k] = <PyObject*>value
185185
table.vals[k] = i
186186
out[i] = 0
187187
{{else}}
@@ -245,7 +245,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
245245

246246
{{if dtype == 'object'}}
247247
for i in range(n):
248-
kh_put_{{ttype}}(table, <PyObject*> values[i], &ret)
248+
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
249249
{{else}}
250250
with nogil:
251251
for i in range(n):
@@ -259,7 +259,7 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
259259
{{if dtype == 'object'}}
260260
for i in range(n):
261261
val = arr[i]
262-
k = kh_get_{{ttype}}(table, <PyObject*> val)
262+
k = kh_get_{{ttype}}(table, <PyObject*>val)
263263
result[i] = (k != table.n_buckets)
264264
{{else}}
265265
with nogil:
@@ -342,7 +342,7 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna):
342342
else:
343343
continue
344344

345-
modes[j] = <object> table.keys[k]
345+
modes[j] = <object>table.keys[k]
346346
{{endif}}
347347

348348
kh_destroy_{{table_type}}(table)

Diff for: pandas/_libs/join.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22

3-
cimport cython
4-
from cython cimport Py_ssize_t
3+
import cython
4+
from cython import Py_ssize_t
55

66
import numpy as np
77
cimport numpy as cnp

0 commit comments

Comments
 (0)