Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cudf._lib.utils usage in favor of pylibcudf #2082

Merged
merged 3 commits into from
Dec 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 39 additions & 4 deletions python/morpheus/morpheus/_lib/cudf_helpers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import itertools

import cudf
from cudf.core.column import ColumnBase
from cudf.core.dtypes import StructDtype

from libcpp.string cimport string
Expand All @@ -26,8 +29,6 @@ from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.types cimport size_type

from cudf._lib.column cimport Column
from cudf._lib.utils cimport data_from_unique_ptr
from cudf._lib.utils cimport table_view_from_table

##### THE FOLLOWING CODE IS COPIED FROM CUDF AND SHOULD BE REMOVED WHEN UPDATING TO cudf>=24.12 #####
# see https://github.com/rapidsai/cudf/pull/17193 for details
Expand All @@ -39,6 +40,7 @@ cimport pylibcudf.libcudf.copying as cpp_copying
from pylibcudf.libcudf.column.column_view cimport column_view
from libcpp.memory cimport make_unique, unique_ptr
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf cimport Table as plc_Table
from cudf._lib.scalar cimport DeviceScalar

# imports needed for from_column_view_with_fix
Expand Down Expand Up @@ -289,8 +291,35 @@ cdef public api:
index_names = schema_infos[0:index_col_count] if index_col_count > 0 else None
column_names = schema_infos[index_col_count:]

data, index = data_from_unique_ptr(move(table.tbl), column_names=column_names, index_names=index_names)
plc_table = plc_Table.from_libcudf(move(table.tbl))

if index_names is None:
index = None
data = {
col_name: ColumnBase.from_pylibcudf(col)
for col_name, col in zip(
column_names, plc_table.columns()
)
}
else:
result_columns = [
ColumnBase.from_pylibcudf(col)
for col in plc_table.columns()
]
index = cudf.Index._from_data(
dict(
zip(
index_names,
result_columns[: len(index_names)],
)
)
)
data = dict(
zip(
column_names,
result_columns[len(index_names) :],
)
)
df = cudf.DataFrame._from_data(data, index)

# Update the struct field names after the DataFrame is created
Expand Down Expand Up @@ -356,7 +385,13 @@ cdef public api:

cdef vector[string] temp_col_names = get_column_names(table, True)

cdef table_view input_table_view = table_view_from_table(table, ignore_index=False)
cdef plc_Table plc_table = plc_Table(
[
col.to_pylibcudf(mode="read")
for col in itertools.chain(table.index._columns, table._columns)
]
)
cdef table_view input_table_view = plc_table.view()
cdef vector[string] index_names
cdef vector[string] column_names

Expand Down
Loading