Skip to content

Commit

Permalink
Remove cudf._lib.concat in favor of inlining pylibcudf (#17344)
Browse files Browse the repository at this point in the history
Contributes to #17317

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #17344
  • Loading branch information
mroeschke authored Nov 18, 2024
1 parent 18b40dc commit ba21673
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 47 deletions.
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ set(cython_sources
aggregation.pyx
binaryop.pyx
column.pyx
concat.pyx
copying.pyx
csv.pyx
datetime.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from . import (
binaryop,
concat,
copying,
csv,
datetime,
Expand Down
35 changes: 0 additions & 35 deletions python/cudf/cudf/_lib/concat.pyx

This file was deleted.

2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ from pylibcudf.libcudf.table.table cimport table, table_view

cdef data_from_unique_ptr(
unique_ptr[table] c_tbl, column_names, index_names=*)
cdef data_from_pylibcudf_table(tbl, column_names, index_names=*)
cpdef data_from_pylibcudf_table(tbl, column_names, index_names=*)
cpdef data_from_pylibcudf_io(tbl_with_meta, column_names = *, index_names = *)
cdef data_from_table_view(
table_view tv, object owner, object column_names, object index_names=*)
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ cdef data_from_unique_ptr(
)


cdef data_from_pylibcudf_table(tbl, column_names, index_names=None):
cpdef data_from_pylibcudf_table(tbl, column_names, index_names=None):
return _data_from_columns(
columns_from_pylibcudf_table(tbl),
column_names,
Expand Down
4 changes: 1 addition & 3 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,9 +1204,7 @@ def _concat(
elif newsize == 0:
codes_col = column.column_empty(0, head.codes.dtype, masked=True)
else:
# Filter out inputs that have 0 length, then concatenate.
codes = [o for o in codes if len(o)]
codes_col = libcudf.concat.concat_columns(objs)
codes_col = column.concat_columns(codes) # type: ignore[arg-type]

codes_col = as_unsigned_codes(
len(cats),
Expand Down
9 changes: 8 additions & 1 deletion python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
from typing_extensions import Self

import pylibcudf as plc
import rmm

import cudf
Expand Down Expand Up @@ -2300,4 +2301,10 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
return column_empty(0, head.dtype, masked=True)

# Filter out inputs that have 0 length, then concatenate.
return libcudf.concat.concat_columns([o for o in objs if len(o)])
objs_with_len = [o for o in objs if len(o)]
with acquire_spill_lock():
return Column.from_pylibcudf(
plc.concatenate.concatenate(
[col.to_pylibcudf(mode="read") for col in objs_with_len]
)
)
29 changes: 25 additions & 4 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1787,11 +1787,32 @@ def _concat(
)

# Concatenate the Tables
out = cls._from_data(
*libcudf.concat.concat_tables(
tables, ignore_index=ignore_index or are_all_range_index
ignore = ignore_index or are_all_range_index
index_names = None if ignore else tables[0]._index_names
column_names = tables[0]._column_names
with acquire_spill_lock():
plc_tables = [
plc.Table(
[
c.to_pylibcudf(mode="read")
for c in (
table._columns
if ignore
else itertools.chain(
table._index._columns, table._columns
)
)
]
)
for table in tables
]

concatted = libcudf.utils.data_from_pylibcudf_table(
plc.concatenate.concatenate(plc_tables),
column_names=column_names,
index_names=index_names,
)
)
out = cls._from_data(*concatted)

# If ignore_index is True, all input frames are empty, and at
# least one input frame has an index, assign a new RangeIndex
Expand Down

0 comments on commit ba21673

Please sign in to comment.