Skip to content

Commit

Permalink
Add to_dlpack/from_dlpack APIs to pylibcudf (#17055)
Browse files Browse the repository at this point in the history
Contributes to #15162

Could use some advice how to type the input of `from_dlpack` and outut of `to_dlpack` which are PyCapsule objects.
EDIT: I notice Cython just types them as object https://github.com/cython/cython/blob/master/Cython/Includes/cpython/pycapsule.pxd. Stylistically do we want add `object var_name` or just leave untyped?

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #17055
  • Loading branch information
mroeschke authored Oct 25, 2024
1 parent 5cba4fb commit 8bc9f19
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 66 deletions.
69 changes: 8 additions & 61 deletions python/cudf/cudf/_lib/interop.pyx
Original file line number Diff line number Diff line change
@@ -1,49 +1,22 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from cpython cimport pycapsule
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

import pylibcudf

from pylibcudf.libcudf.interop cimport (
DLManagedTensor,
from_dlpack as cpp_from_dlpack,
to_dlpack as cpp_to_dlpack,
)
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.table.table_view cimport table_view

from cudf._lib.utils cimport (
columns_from_pylibcudf_table,
columns_from_unique_ptr,
table_view_from_columns,
)
from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf.core.buffer import acquire_spill_lock
from cudf.core.dtypes import ListDtype, StructDtype


def from_dlpack(dlpack_capsule):
def from_dlpack(object dlpack_capsule):
"""
Converts a DLPack Tensor PyCapsule into a list of columns.
DLPack Tensor PyCapsule is expected to have the name "dltensor".
"""
cdef DLManagedTensor* dlpack_tensor = <DLManagedTensor*>pycapsule.\
PyCapsule_GetPointer(dlpack_capsule, 'dltensor')
pycapsule.PyCapsule_SetName(dlpack_capsule, 'used_dltensor')

cdef unique_ptr[table] c_result

with nogil:
c_result = move(
cpp_from_dlpack(dlpack_tensor)
)

res = columns_from_unique_ptr(move(c_result))
dlpack_tensor.deleter(dlpack_tensor)
return res
return columns_from_pylibcudf_table(
pylibcudf.interop.from_dlpack(dlpack_capsule)
)


def to_dlpack(list source_columns):
Expand All @@ -52,39 +25,13 @@ def to_dlpack(list source_columns):
DLPack Tensor PyCapsule will have the name "dltensor".
"""
if any(column.null_count for column in source_columns):
raise ValueError(
"Cannot create a DLPack tensor with null values. \
Input is required to have null count as zero."
)

cdef DLManagedTensor *dlpack_tensor
cdef table_view source_table_view = table_view_from_columns(source_columns)

with nogil:
dlpack_tensor = cpp_to_dlpack(
source_table_view
return pylibcudf.interop.to_dlpack(
pylibcudf.Table(
[col.to_pylibcudf(mode="read") for col in source_columns]
)

return pycapsule.PyCapsule_New(
dlpack_tensor,
'dltensor',
dlmanaged_tensor_pycapsule_deleter
)


cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept:
cdef DLManagedTensor* dlpack_tensor = <DLManagedTensor*>0
try:
dlpack_tensor = <DLManagedTensor*>pycapsule.PyCapsule_GetPointer(
pycap_obj, 'used_dltensor')
return # we do not call a used capsule's deleter
except Exception:
dlpack_tensor = <DLManagedTensor*>pycapsule.PyCapsule_GetPointer(
pycap_obj, 'dltensor')
dlpack_tensor.deleter(dlpack_tensor)


def gather_metadata(object cols_dtypes):
"""
Generates a ColumnMetadata vector for each column.
Expand Down
2 changes: 2 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from . cimport (
expressions,
filling,
groupby,
interop,
join,
json,
labeling,
Expand Down Expand Up @@ -62,6 +63,7 @@ __all__ = [
"filling",
"gpumemoryview",
"groupby",
"interop",
"join",
"json",
"lists",
Expand Down
8 changes: 8 additions & 0 deletions python/pylibcudf/pylibcudf/interop.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from pylibcudf.table cimport Table


cpdef Table from_dlpack(object managed_tensor)

cpdef object to_dlpack(Table input)
94 changes: 93 additions & 1 deletion python/pylibcudf/pylibcudf/interop.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_New
from cpython.pycapsule cimport (
PyCapsule_GetPointer,
PyCapsule_IsValid,
PyCapsule_New,
PyCapsule_SetName,
)
from libc.stdlib cimport free
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
Expand All @@ -16,11 +21,14 @@ from pylibcudf.libcudf.interop cimport (
ArrowArray,
ArrowArrayStream,
ArrowSchema,
DLManagedTensor,
column_metadata,
from_arrow_column as cpp_from_arrow_column,
from_arrow_stream as cpp_from_arrow_stream,
from_dlpack as cpp_from_dlpack,
to_arrow_host_raw,
to_arrow_schema_raw,
to_dlpack as cpp_to_dlpack,
)
from pylibcudf.libcudf.table.table cimport table

Expand Down Expand Up @@ -315,3 +323,87 @@ def _to_arrow_scalar(cudf_object, metadata=None):
# Note that metadata for scalars is primarily important for preserving
# information on nested types since names are otherwise irrelevant.
return to_arrow(Column.from_scalar(cudf_object, 1), metadata=metadata)[0]


cpdef Table from_dlpack(object managed_tensor):
"""
Convert a DLPack DLTensor into a cudf table.
For details, see :cpp:func:`cudf::from_dlpack`
Parameters
----------
managed_tensor : PyCapsule
A 1D or 2D column-major (Fortran order) tensor.
Returns
-------
Table
Table with a copy of the tensor data.
"""
if not PyCapsule_IsValid(managed_tensor, "dltensor"):
raise ValueError("Invalid PyCapsule object")
cdef unique_ptr[table] c_result
cdef DLManagedTensor* dlpack_tensor = <DLManagedTensor*>PyCapsule_GetPointer(
managed_tensor, "dltensor"
)
if dlpack_tensor is NULL:
raise ValueError("PyCapsule object contained a NULL pointer")
PyCapsule_SetName(managed_tensor, "used_dltensor")

# Note: A copy is always performed when converting the dlpack
# data to a libcudf table. We also delete the dlpack_tensor pointer
# as the pointer is not deleted by libcudf's from_dlpack function.
# TODO: https://github.com/rapidsai/cudf/issues/10874
# TODO: https://github.com/rapidsai/cudf/issues/10849
with nogil:
c_result = cpp_from_dlpack(dlpack_tensor)

cdef Table result = Table.from_libcudf(move(c_result))
dlpack_tensor.deleter(dlpack_tensor)
return result


cpdef object to_dlpack(Table input):
"""
Convert a cudf table into a DLPack DLTensor.
For details, see :cpp:func:`cudf::to_dlpack`
Parameters
----------
input : Table
A 1D or 2D column-major (Fortran order) tensor.
Returns
-------
PyCapsule
1D or 2D DLPack tensor with a copy of the table data, or nullptr.
"""
for col in input._columns:
if col.null_count():
raise ValueError(
"Cannot create a DLPack tensor with null values. "
"Input is required to have null count as zero."
)
cdef DLManagedTensor *dlpack_tensor

with nogil:
dlpack_tensor = cpp_to_dlpack(input.view())

return PyCapsule_New(
dlpack_tensor,
"dltensor",
dlmanaged_tensor_pycapsule_deleter
)


cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept:
if PyCapsule_IsValid(pycap_obj, "used_dltensor"):
# we do not call a used capsule's deleter
return
cdef DLManagedTensor* dlpack_tensor = <DLManagedTensor*>PyCapsule_GetPointer(
pycap_obj, "dltensor"
)
if dlpack_tensor is not NULL:
dlpack_tensor.deleter(dlpack_tensor)
10 changes: 6 additions & 4 deletions python/pylibcudf/pylibcudf/libcudf/interop.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ cdef extern from "cudf/interop.hpp" nogil:

cdef extern from "cudf/interop.hpp" namespace "cudf" \
nogil:
cdef unique_ptr[table] from_dlpack(const DLManagedTensor* tensor
) except +
cdef unique_ptr[table] from_dlpack(
const DLManagedTensor* managed_tensor
) except +

DLManagedTensor* to_dlpack(table_view input_table
) except +
DLManagedTensor* to_dlpack(
const table_view& input
) except +

cdef cppclass column_metadata:
column_metadata() except +
Expand Down
31 changes: 31 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_interop.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import cupy as cp
import numpy as np
import pyarrow as pa
import pytest
from utils import assert_table_eq

import pylibcudf as plc

Expand Down Expand Up @@ -67,3 +70,31 @@ def test_decimal_other(data_type):

arrow_type = plc.interop.to_arrow(data_type, precision=precision)
assert arrow_type == pa.decimal128(precision, 0)


def test_round_trip_dlpack_plc_table():
expected = pa.table({"a": [1, 2, 3], "b": [5, 6, 7]})
plc_table = plc.interop.from_arrow(expected)
result = plc.interop.from_dlpack(plc.interop.to_dlpack(plc_table))
assert_table_eq(expected, result)


@pytest.mark.parametrize("array", [np.array, cp.array])
def test_round_trip_dlpack_array(array):
arr = array([1, 2, 3])
result = plc.interop.from_dlpack(arr.__dlpack__())
expected = pa.table({"a": [1, 2, 3]})
assert_table_eq(expected, result)


def test_to_dlpack_error():
plc_table = plc.interop.from_arrow(
pa.table({"a": [1, None, 3], "b": [5, 6, 7]})
)
with pytest.raises(ValueError, match="Cannot create a DLPack tensor"):
plc.interop.from_dlpack(plc.interop.to_dlpack(plc_table))


def test_from_dlpack_error():
with pytest.raises(ValueError, match="Invalid PyCapsule object"):
plc.interop.from_dlpack(1)

0 comments on commit 8bc9f19

Please sign in to comment.