From 5a4e242325ccaff1d0665a4fdcff6d04427c8e63 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 22 Jul 2019 17:25:56 -0700 Subject: [PATCH 1/2] CLN: simplify join take call --- pandas/_libs/join.pyx | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index f9e1ebb11116b..98c67bcb9c178 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -8,8 +8,9 @@ from numpy cimport (ndarray, uint32_t, uint64_t, float32_t, float64_t) cnp.import_array() -from pandas._libs.algos import groupsort_indexer, ensure_platform_int -from pandas.core.algorithms import take_nd +from pandas._libs.algos import ( + groupsort_indexer, ensure_platform_int, take_1d_int64_int64 +) def inner_join(const int64_t[:] left, const int64_t[:] right, @@ -67,8 +68,8 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right, Py_ssize_t max_groups, sort=True): cdef: Py_ssize_t i, j, k, count = 0 - ndarray[int64_t] left_count, right_count - ndarray left_sorter, right_sorter, rev + ndarray[int64_t] left_count, right_count, left_sorter, right_sorter + ndarray rev ndarray[int64_t] left_indexer, right_indexer int64_t lc, rc @@ -201,9 +202,12 @@ def full_outer_join(const int64_t[:] left, const int64_t[:] right, _get_result_indexer(right_sorter, right_indexer)) -def _get_result_indexer(sorter, indexer): +cdef _get_result_indexer(ndarray[int64_t] sorter, ndarray[int64_t] indexer): if len(sorter) > 0: - res = take_nd(sorter, indexer, fill_value=-1) + # cython-only equivalent to + # `res = algos.take_nd(sorter, indexer, fill_value=-1)` + res = np.empty(len(indexer), dtype=np.int64) + take_1d_int64_int64(sorter, indexer, res, -1) else: # length-0 case res = np.empty(len(indexer), dtype=np.int64) From 5d58ea7b76c618ebb2ad82df7f54f05a2c28c207 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 23 Jul 2019 14:48:37 -0700 Subject: [PATCH 2/2] avoid TypeError --- pandas/_libs/join.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 98c67bcb9c178..238bfd0be0aa7 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -125,10 +125,8 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right, # no multiple matches for any row on the left # this is a short-cut to avoid groupsort_indexer # otherwise, the `else` path also works in this case - left_sorter = ensure_platform_int(left_sorter) - rev = np.empty(len(left), dtype=np.intp) - rev.put(left_sorter, np.arange(len(left))) + rev.put(ensure_platform_int(left_sorter), np.arange(len(left))) else: rev, _ = groupsort_indexer(left_indexer, len(left))