@@ -21,10 +21,9 @@ from numpy cimport (
2121cnp.import_array()
2222
2323from pandas._libs.algos import (
24- ensure_int64,
25- ensure_platform_int,
2624 groupsort_indexer,
2725 take_1d_int64_int64,
26+ take_1d_intp_intp,
2827)
2928
3029
@@ -34,16 +33,16 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
3433 cdef:
3534 Py_ssize_t i, j, k, count = 0
3635 ndarray[intp_t] left_sorter, right_sorter
37- ndarray[int64_t ] left_count, right_count
38- ndarray[int64_t ] left_indexer, right_indexer
39- int64_t lc, rc
36+ ndarray[intp_t ] left_count, right_count
37+ ndarray[intp_t ] left_indexer, right_indexer
38+ intp_t lc, rc
4039 Py_ssize_t loc, left_pos = 0 , right_pos = 0 , position = 0
4140 Py_ssize_t offset
4241
4342 # NA group in location 0
4443
45- left_sorter, left_count = groupsort_indexer(ensure_int64( left) , max_groups)
46- right_sorter, right_count = groupsort_indexer(ensure_int64( right) , max_groups)
44+ left_sorter, left_count = groupsort_indexer(left, max_groups)
45+ right_sorter, right_count = groupsort_indexer(right, max_groups)
4746
4847 with nogil:
4948 # First pass, determine size of result set, do not use the NA group
@@ -58,8 +57,8 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
5857 left_pos = left_count[0 ]
5958 right_pos = right_count[0 ]
6059
61- left_indexer = np.empty(count, dtype = np.int64 )
62- right_indexer = np.empty(count, dtype = np.int64 )
60+ left_indexer = np.empty(count, dtype = np.intp )
61+ right_indexer = np.empty(count, dtype = np.intp )
6362
6463 with nogil:
6564 for i in range (1 , max_groups + 1 ):
@@ -85,17 +84,17 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
8584 Py_ssize_t max_groups , bint sort = True ):
8685 cdef:
8786 Py_ssize_t i, j, k, count = 0
88- ndarray[int64_t ] left_count, right_count
87+ ndarray[intp_t ] left_count, right_count
8988 ndarray[intp_t] rev, left_sorter, right_sorter
90- ndarray[int64_t ] left_indexer, right_indexer
91- int64_t lc, rc
89+ ndarray[intp_t ] left_indexer, right_indexer
90+ intp_t lc, rc
9291 Py_ssize_t loc, left_pos = 0 , right_pos = 0 , position = 0
9392 Py_ssize_t offset
9493
9594 # NA group in location 0
9695
97- left_sorter, left_count = groupsort_indexer(ensure_int64( left) , max_groups)
98- right_sorter, right_count = groupsort_indexer(ensure_int64( right) , max_groups)
96+ left_sorter, left_count = groupsort_indexer(left, max_groups)
97+ right_sorter, right_count = groupsort_indexer(right, max_groups)
9998
10099 with nogil:
101100 # First pass, determine size of result set, do not use the NA group
@@ -109,8 +108,8 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
109108 left_pos = left_count[0 ]
110109 right_pos = right_count[0 ]
111110
112- left_indexer = np.empty(count, dtype = np.int64 )
113- right_indexer = np.empty(count, dtype = np.int64 )
111+ left_indexer = np.empty(count, dtype = np.intp )
112+ right_indexer = np.empty(count, dtype = np.intp )
114113
115114 with nogil:
116115 for i in range (1 , max_groups + 1 ):
@@ -142,11 +141,10 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
142141 # this is a short-cut to avoid groupsort_indexer
143142 # otherwise, the `else` path also works in this case
144143 rev = np.empty(len (left), dtype = np.intp)
145- rev.put(ensure_platform_int( left_sorter) , np.arange(len (left)))
144+ rev.put(left_sorter, np.arange(len (left)))
146145 else :
147146 rev, _ = groupsort_indexer(left_indexer, len (left))
148147
149- rev = ensure_platform_int(rev)
150148 right_indexer = right_indexer.take(rev)
151149 left_indexer = left_indexer.take(rev)
152150
@@ -159,16 +157,16 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
159157 cdef:
160158 Py_ssize_t i, j, k, count = 0
161159 ndarray[intp_t] left_sorter, right_sorter
162- ndarray[int64_t ] left_count, right_count
163- ndarray[int64_t ] left_indexer, right_indexer
164- int64_t lc, rc
165- int64_t left_pos = 0 , right_pos = 0
160+ ndarray[intp_t ] left_count, right_count
161+ ndarray[intp_t ] left_indexer, right_indexer
162+ intp_t lc, rc
163+ intp_t left_pos = 0 , right_pos = 0
166164 Py_ssize_t offset, position = 0
167165
168166 # NA group in location 0
169167
170- left_sorter, left_count = groupsort_indexer(ensure_int64( left) , max_groups)
171- right_sorter, right_count = groupsort_indexer(ensure_int64( right) , max_groups)
168+ left_sorter, left_count = groupsort_indexer(left, max_groups)
169+ right_sorter, right_count = groupsort_indexer(right, max_groups)
172170
173171 with nogil:
174172 # First pass, determine size of result set, do not use the NA group
@@ -185,8 +183,8 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
185183 left_pos = left_count[0 ]
186184 right_pos = right_count[0 ]
187185
188- left_indexer = np.empty(count, dtype = np.int64 )
189- right_indexer = np.empty(count, dtype = np.int64 )
186+ left_indexer = np.empty(count, dtype = np.intp )
187+ right_indexer = np.empty(count, dtype = np.intp )
190188
191189 with nogil:
192190 for i in range (1 , max_groups + 1 ):
@@ -217,19 +215,17 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
217215 _get_result_indexer(right_sorter, right_indexer))
218216
219217
220- cdef ndarray[int64_t ] _get_result_indexer(
221- ndarray[intp_t] sorter, ndarray[int64_t ] indexer
218+ cdef ndarray[intp_t ] _get_result_indexer(
219+ ndarray[intp_t] sorter, ndarray[intp_t ] indexer
222220):
223221 if len (sorter) > 0 :
224222 # cython-only equivalent to
225223 # `res = algos.take_nd(sorter, indexer, fill_value=-1)`
226- res = np.empty(len (indexer), dtype = np.int64)
227- take_1d_int64_int64(ensure_int64(sorter), ensure_platform_int(indexer), res, - 1 )
228- # FIXME: sorter is intp_t, not int64_t, opposite for indexer;
229- # will this break on 32bit builds?
224+ res = np.empty(len (indexer), dtype = np.intp)
225+ take_1d_intp_intp(sorter, indexer, res, - 1 )
230226 else :
231227 # length-0 case
232- res = np.empty(len (indexer), dtype = np.int64 )
228+ res = np.empty(len (indexer), dtype = np.intp )
233229 res[:] = - 1
234230
235231 return res
0 commit comments