Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add cuda backend support for to_raggedtensor and from_raggedtensor functions #3263

Merged
8 changes: 8 additions & 0 deletions docs/reference/toctree.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@
generated/ak.to_feather
generated/ak.from_avro_file

.. toctree::
:caption: Conversions for machine learning

generated/ak.from_raggedtensor
generated/ak.to_raggedtensor
generated/ak.from_torch
generated/ak.to_torch

.. toctree::
:caption: Converting to Pandas DataFrames

Expand Down
42 changes: 39 additions & 3 deletions src/awkward/operations/ak_from_raggedtensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from __future__ import annotations

import re

import awkward as ak
from awkward._dispatch import high_level_function

Expand Down Expand Up @@ -30,18 +32,25 @@ def from_raggedtensor(array):
def _impl(array):
try:
# get the flat values
content = array.flat_values.numpy()
content = array.flat_values
except AttributeError as err:
raise TypeError(
"""only RaggedTensor can be converted to awkward array"""
) from err
# convert them to ak.contents right away

# handle gpu and cpu instances separately
device = content.backing_device

content = _tensor_to_np_or_cp(content, device)

# convert flat_values to ak.contents right away
content = ak.contents.NumpyArray(content)

# get the offsets
offsets_arr = []
for splits in array.nested_row_splits:
split = splits.numpy()
# handle gpu and cpu instances separately
split = _tensor_to_np_or_cp(splits, device)
# convert to ak.index
offset = ak.index.Index64(split)
offsets_arr.append(offset)
Expand All @@ -55,6 +64,33 @@ def _impl(array):
return ak.Array(_recursive_call(content, offsets_arr, 0))


def _tensor_to_np_or_cp(array, device):
matched_device = re.match(".*:(CPU|GPU):[0-9]+", device)

if matched_device is None:
raise NotImplementedError(
f"TensorFlow device has an unexpected format: {device!r}"
)
elif matched_device.groups()[0] == "GPU":
try:
import tensorflow as tf
except ImportError as err:
raise ImportError(
"""to use ak.from_raggedtensor, you must install the 'tensorflow' package with:

pip install tensorflow
or
conda install tensorflow"""
) from err

from awkward._nplikes.cupy import Cupy

cp = Cupy.instance()
return cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array))
elif matched_device.groups()[0] == "CPU":
return array.numpy()


def _recursive_call(content, offsets_arr, count):
if count == len(offsets_arr) - 2:
return ak.contents.ListOffsetArray(
Expand Down
56 changes: 49 additions & 7 deletions src/awkward/operations/ak_to_raggedtensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@

import awkward as ak
from awkward._dispatch import high_level_function
from awkward._nplikes.numpy_like import NumpyMetadata

__all__ = ("to_raggedtensor",)

np = NumpyMetadata.instance()


@high_level_function()
def to_raggedtensor(array):
Expand Down Expand Up @@ -45,14 +48,49 @@ def _impl(array):
# also transforms a python list to awkward array
array = ak.to_layout(array, allow_record=False)

# keep the same device
ak_device = ak.backend(array)
if ak_device not in ["cuda", "cpu"]:
raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""")

if ak_device == "cpu":
device = "CPU:0"
else:
id = _find_innermost_content(array).data.device.id
device = "GPU:" + str(id)

with tf.device(device):
if isinstance(array, ak.contents.numpyarray.NumpyArray):
values = array.data
# handle cupy separately
values = _convert_to_tensor_if_cupy(values)
return tf.RaggedTensor.from_row_splits(
values=values, row_splits=[0, array.__len__()]
)

else:
flat_values, nested_row_splits = _recursive_call(array, ())
return tf.RaggedTensor.from_nested_row_splits(
flat_values, nested_row_splits
)


def _find_innermost_content(array):
if isinstance(array, ak.contents.numpyarray.NumpyArray):
return tf.RaggedTensor.from_row_splits(
values=array.data, row_splits=[0, array.__len__()]
)
return array
else:
return _find_innermost_content(array.content)


def _convert_to_tensor_if_cupy(array):
if isinstance(array, np.ndarray):
return array
else:
flat_values, nested_row_splits = _recursive_call(array, ())
# converts cupy directly to tensor,
# since `tf.RaggedTensor.from_nested_row_splits` can not work with Cupy arrays
import tensorflow as tf

return tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits)
return tf.experimental.dlpack.from_dlpack(array.toDlpack())


def _recursive_call(layout, offsets_arr):
Expand All @@ -75,10 +113,14 @@ def _recursive_call(layout, offsets_arr):
)

# recursively gather all of the offsets of an array
offsets_arr += (layout.offsets.data,)
offset = layout.offsets.data
offset = _convert_to_tensor_if_cupy(offset)
offsets_arr += (offset,)

except AttributeError:
# at the last iteration form a ragged tensor from the
# accumulated offsets and flattened values of the array
return layout.data, offsets_arr
data = layout.data
data = _convert_to_tensor_if_cupy(data)
return data, offsets_arr
return _recursive_call(layout.content, offsets_arr)
Loading