From 800824cc048edbdfade168b351d9000b45f54974 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 28 Jun 2024 10:12:16 -0700 Subject: [PATCH 1/2] Initial commit --- python/cudf/cudf/_lib/lists.pyx | 18 +++----------- .../libcudf/lists/count_elements.pxd | 2 +- python/cudf/cudf/_lib/pylibcudf/lists.pxd | 2 ++ python/cudf/cudf/_lib/pylibcudf/lists.pyx | 24 +++++++++++++++++++ .../cudf/cudf/pylibcudf_tests/test_lists.py | 10 ++++++++ 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 0ad09dba717..047ed27329a 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -9,9 +9,6 @@ from libcpp.utility cimport move from cudf._lib.column cimport Column from cudf._lib.pylibcudf.libcudf.column.column cimport column from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view -from cudf._lib.pylibcudf.libcudf.lists.count_elements cimport ( - count_elements as cpp_count_elements, -) from cudf._lib.pylibcudf.libcudf.lists.extract cimport extract_list_element from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport ( lists_column_view, @@ -38,19 +35,10 @@ from cudf._lib.pylibcudf cimport Scalar @acquire_spill_lock() def count_elements(Column col): - - # shared_ptr required because lists_column_view has no default - # ctor - cdef shared_ptr[lists_column_view] list_view = ( - make_shared[lists_column_view](col.view()) + return Column.from_pylibcudf( + pylibcudf.lists.count_elements( + col.to_pylibcudf(mode="read")) ) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_count_elements(list_view.get()[0])) - - result = Column.from_unique_ptr(move(c_result)) - return result @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/count_elements.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/count_elements.pxd index 38bdd4db0bb..ba57a839fbc 100644 --- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/count_elements.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/count_elements.pxd @@ -9,4 +9,4 @@ from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport ( cdef extern from "cudf/lists/count_elements.hpp" namespace "cudf::lists" nogil: - cdef unique_ptr[column] count_elements(const lists_column_view) except + + cdef unique_ptr[column] count_elements(const lists_column_view&) except + diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pxd b/python/cudf/cudf/_lib/pylibcudf/lists.pxd index 2ccf0139e90..2ea83cc4c41 100644 --- a/python/cudf/cudf/_lib/pylibcudf/lists.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/lists.pxd @@ -23,3 +23,5 @@ cpdef Column contains(Column, ColumnOrScalar) cpdef Column contains_nulls(Column) cpdef Column index_of(Column, ColumnOrScalar, bool) + +cpdef Column count_elements(Column) diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pyx b/python/cudf/cudf/_lib/pylibcudf/lists.pyx index a94d940accd..16852e58ae6 100644 --- a/python/cudf/cudf/_lib/pylibcudf/lists.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/lists.pyx @@ -15,6 +15,9 @@ from cudf._lib.pylibcudf.libcudf.lists.combine cimport ( concatenate_null_policy, concatenate_rows as cpp_concatenate_rows, ) +from cudf._lib.pylibcudf.libcudf.lists.count_elements cimport ( + count_elements as cpp_count_elements, +) from cudf._lib.pylibcudf.libcudf.table.table cimport table from cudf._lib.pylibcudf.libcudf.types cimport size_type from cudf._lib.pylibcudf.lists cimport ColumnOrScalar @@ -206,3 +209,24 @@ cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_o find_option, )) return Column.from_libcudf(move(c_result)) + + +cpdef Column count_elements(Column input): + """Count the number of rows in each + list element in the given lists column. + Parameters + ---------- + input : Column + The input column + Returns + ------- + Column + A new Column of the lengths of each list element + """ + cdef ListColumnView list_view = input.list_view() + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(cpp_count_elements(list_view.view())) + + return Column.from_libcudf(move(c_result)) diff --git a/python/cudf/cudf/pylibcudf_tests/test_lists.py b/python/cudf/cudf/pylibcudf_tests/test_lists.py index c781126e388..13570a48cc2 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_lists.py +++ b/python/cudf/cudf/pylibcudf_tests/test_lists.py @@ -134,3 +134,13 @@ def test_index_of_list_column(test_data, column): expect = pa.array(column[1], type=pa.int32()) assert_column_eq(expect, res) + + +def test_count_elements(test_data): + arr = pa.array(test_data[0][1]) + plc_column = plc.interop.from_arrow(arr) + res = plc.lists.count_elements(plc_column) + + expect = pa.array([1, 1, 0, 3], type=pa.int32()) + + assert_column_eq(expect, res) From c0c33888e5a5399ca48db9d39f11fba681d792f7 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 1 Jul 2024 13:21:29 -0700 Subject: [PATCH 2/2] Fix doc string --- python/cudf/cudf/_lib/pylibcudf/lists.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pyx b/python/cudf/cudf/_lib/pylibcudf/lists.pyx index 16852e58ae6..904dd1df264 100644 --- a/python/cudf/cudf/_lib/pylibcudf/lists.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/lists.pyx @@ -214,10 +214,14 @@ cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_o cpdef Column count_elements(Column input): """Count the number of rows in each list element in the given lists column. + + For details, see :cpp:func:`count_elements`. + Parameters ---------- input : Column The input column + Returns ------- Column