From 303f78ff5b0d918bc42cd6501bcbc7489e069e20 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Tue, 1 Oct 2024 17:51:50 +0300 Subject: [PATCH 1/9] add cuda backend support --- src/awkward/operations/ak_to_raggedtensor.py | 51 +++++++++++++++++--- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py index 5fcb2e2d5f..382380f746 100644 --- a/src/awkward/operations/ak_to_raggedtensor.py +++ b/src/awkward/operations/ak_to_raggedtensor.py @@ -4,9 +4,12 @@ import awkward as ak from awkward._dispatch import high_level_function +from awkward._nplikes.numpy_like import NumpyMetadata __all__ = ("to_raggedtensor",) +np = NumpyMetadata.instance() + @high_level_function() def to_raggedtensor(array): @@ -45,15 +48,39 @@ def _impl(array): # also transforms a python list to awkward array array = ak.to_layout(array, allow_record=False) - if isinstance(array, ak.contents.numpyarray.NumpyArray): - return tf.RaggedTensor.from_row_splits( - values=array.data, row_splits=[0, array.__len__()] - ) + # keep the same device + ak_device = ak.backend(array) + if ak_device not in ['cuda', 'cpu']: + raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""") + + if ak_device == 'cpu': + device = 'CPU:0' else: - flat_values, nested_row_splits = _recursive_call(array, ()) + device = 'GPU:0' + + with tf.device(device): + if isinstance(array, ak.contents.numpyarray.NumpyArray): + values = array.data + # handle cupy separately + if not isinstance(array.data, np.ndarray): + values = _cupy_to_tensor(values) + + return tf.RaggedTensor.from_row_splits( + values=values, row_splits=[0, array.__len__()] + ) + + else: + flat_values, nested_row_splits = _recursive_call(array, ()) - return tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits) + ragged_tensor = tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits) + print(ragged_tensor[0][0].device) + return ragged_tensor +def _cupy_to_tensor(cupy): + # converts cupy directly to tensor, + # since `tf.RaggedTensor.from_nested_row_splits` can not work with Cupy arrays + import tensorflow as tf + return tf.experimental.dlpack.from_dlpack(cupy.toDlpack()) def _recursive_call(layout, offsets_arr): try: @@ -75,10 +102,18 @@ def _recursive_call(layout, offsets_arr): ) # recursively gather all of the offsets of an array - offsets_arr += (layout.offsets.data,) + offset = layout.offsets.data + if isinstance(offset, np.ndarray): + offsets_arr += (offset,) + else: + offsets_arr += (_cupy_to_tensor(offset),) except AttributeError: # at the last iteration form a ragged tensor from the # accumulated offsets and flattened values of the array - return layout.data, offsets_arr + data = layout.data + if isinstance(data, np.ndarray): + return data, offsets_arr + else: + return _cupy_to_tensor(data), offsets_arr return _recursive_call(layout.content, offsets_arr) From 7f9bb748821ad8e995cbd67f6f0dcc31596d4891 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Tue, 1 Oct 2024 17:57:30 +0300 Subject: [PATCH 2/9] style changes --- src/awkward/operations/ak_to_raggedtensor.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py index 382380f746..e4898f5a6b 100644 --- a/src/awkward/operations/ak_to_raggedtensor.py +++ b/src/awkward/operations/ak_to_raggedtensor.py @@ -50,13 +50,13 @@ def _impl(array): # keep the same device ak_device = ak.backend(array) - if ak_device not in ['cuda', 'cpu']: + if ak_device not in ["cuda", "cpu"]: raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""") - if ak_device == 'cpu': - device = 'CPU:0' + if ak_device == "cpu": + device = "CPU:0" else: - device = 'GPU:0' + device = "GPU:0" with tf.device(device): if isinstance(array, ak.contents.numpyarray.NumpyArray): @@ -66,22 +66,27 @@ def _impl(array): values = _cupy_to_tensor(values) return tf.RaggedTensor.from_row_splits( - values=values, row_splits=[0, array.__len__()] + values=values, row_splits=[0, array.__len__()] ) else: flat_values, nested_row_splits = _recursive_call(array, ()) - ragged_tensor = tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits) - print(ragged_tensor[0][0].device) + ragged_tensor = tf.RaggedTensor.from_nested_row_splits( + flat_values, nested_row_splits + ) + # print(ragged_tensor[0][0].device) return ragged_tensor + def _cupy_to_tensor(cupy): # converts cupy directly to tensor, # since `tf.RaggedTensor.from_nested_row_splits` can not work with Cupy arrays import tensorflow as tf + return tf.experimental.dlpack.from_dlpack(cupy.toDlpack()) + def _recursive_call(layout, offsets_arr): try: # change all the possible layout types to ListOffsetArray From 4189a02c34690aae465f3665a56c9af6e3803e29 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Tue, 15 Oct 2024 16:42:34 +0300 Subject: [PATCH 3/9] keep gpu id the same --- src/awkward/operations/ak_to_raggedtensor.py | 51 ++++++++------------ 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py index e4898f5a6b..823efcfc39 100644 --- a/src/awkward/operations/ak_to_raggedtensor.py +++ b/src/awkward/operations/ak_to_raggedtensor.py @@ -50,42 +50,37 @@ def _impl(array): # keep the same device ak_device = ak.backend(array) - if ak_device not in ["cuda", "cpu"]: + if ak_device not in ['cuda', 'cpu']: raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""") - if ak_device == "cpu": - device = "CPU:0" + if ak_device == 'cpu': + device = 'CPU:0' else: - device = "GPU:0" + _, depth = array.minmax_depth + id = array[depth-1].content.data.device.id + device = 'GPU:' + str(id) with tf.device(device): if isinstance(array, ak.contents.numpyarray.NumpyArray): values = array.data # handle cupy separately - if not isinstance(array.data, np.ndarray): - values = _cupy_to_tensor(values) - + values = _convert_to_tensor_if_cupy(values) return tf.RaggedTensor.from_row_splits( values=values, row_splits=[0, array.__len__()] ) else: flat_values, nested_row_splits = _recursive_call(array, ()) + return tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits) - ragged_tensor = tf.RaggedTensor.from_nested_row_splits( - flat_values, nested_row_splits - ) - # print(ragged_tensor[0][0].device) - return ragged_tensor - - -def _cupy_to_tensor(cupy): - # converts cupy directly to tensor, - # since `tf.RaggedTensor.from_nested_row_splits` can not work with Cupy arrays - import tensorflow as tf - - return tf.experimental.dlpack.from_dlpack(cupy.toDlpack()) - +def _convert_to_tensor_if_cupy(array): + if isinstance(array, np.ndarray): + return array + else: + # converts cupy directly to tensor, + # since `tf.RaggedTensor.from_nested_row_splits` can not work with Cupy arrays + import tensorflow as tf + return tf.experimental.dlpack.from_dlpack(array.toDlpack()) def _recursive_call(layout, offsets_arr): try: @@ -108,17 +103,13 @@ def _recursive_call(layout, offsets_arr): # recursively gather all of the offsets of an array offset = layout.offsets.data - if isinstance(offset, np.ndarray): - offsets_arr += (offset,) - else: - offsets_arr += (_cupy_to_tensor(offset),) + offset = _convert_to_tensor_if_cupy(offset) + offsets_arr += (offset,) except AttributeError: # at the last iteration form a ragged tensor from the # accumulated offsets and flattened values of the array data = layout.data - if isinstance(data, np.ndarray): - return data, offsets_arr - else: - return _cupy_to_tensor(data), offsets_arr - return _recursive_call(layout.content, offsets_arr) + data = _convert_to_tensor_if_cupy(data) + return data, offsets_arr + return _recursive_call(layout.content, offsets_arr) \ No newline at end of file From 27f9e4416e5a697f5959c6ad1308ae7676c4e653 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Tue, 15 Oct 2024 16:44:16 +0300 Subject: [PATCH 4/9] style changes --- src/awkward/operations/ak_to_raggedtensor.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py index 823efcfc39..a3a532c197 100644 --- a/src/awkward/operations/ak_to_raggedtensor.py +++ b/src/awkward/operations/ak_to_raggedtensor.py @@ -50,15 +50,15 @@ def _impl(array): # keep the same device ak_device = ak.backend(array) - if ak_device not in ['cuda', 'cpu']: + if ak_device not in ["cuda", "cpu"]: raise ValueError("""Only 'cpu' and 'cuda' backend conversions are allowed""") - if ak_device == 'cpu': - device = 'CPU:0' + if ak_device == "cpu": + device = "CPU:0" else: _, depth = array.minmax_depth - id = array[depth-1].content.data.device.id - device = 'GPU:' + str(id) + id = array[depth - 1].content.data.device.id + device = "GPU:" + str(id) with tf.device(device): if isinstance(array, ak.contents.numpyarray.NumpyArray): @@ -71,7 +71,10 @@ def _impl(array): else: flat_values, nested_row_splits = _recursive_call(array, ()) - return tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits) + return tf.RaggedTensor.from_nested_row_splits( + flat_values, nested_row_splits + ) + def _convert_to_tensor_if_cupy(array): if isinstance(array, np.ndarray): @@ -80,8 +83,10 @@ def _convert_to_tensor_if_cupy(array): # converts cupy directly to tensor, # since `tf.RaggedTensor.from_nested_row_splits` can not work with Cupy arrays import tensorflow as tf + return tf.experimental.dlpack.from_dlpack(array.toDlpack()) + def _recursive_call(layout, offsets_arr): try: # change all the possible layout types to ListOffsetArray @@ -112,4 +117,4 @@ def _recursive_call(layout, offsets_arr): data = layout.data data = _convert_to_tensor_if_cupy(data) return data, offsets_arr - return _recursive_call(layout.content, offsets_arr) \ No newline at end of file + return _recursive_call(layout.content, offsets_arr) From e58772bfbebd241c8c90aaebc69428a60104e23b Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Wed, 16 Oct 2024 13:42:18 +0300 Subject: [PATCH 5/9] fix device id selection --- src/awkward/operations/ak_to_raggedtensor.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py index a3a532c197..0a8c797c63 100644 --- a/src/awkward/operations/ak_to_raggedtensor.py +++ b/src/awkward/operations/ak_to_raggedtensor.py @@ -56,8 +56,7 @@ def _impl(array): if ak_device == "cpu": device = "CPU:0" else: - _, depth = array.minmax_depth - id = array[depth - 1].content.data.device.id + id = _find_innermost_content(array).data.device.id device = "GPU:" + str(id) with tf.device(device): @@ -76,6 +75,13 @@ def _impl(array): ) +def _find_innermost_content(array): + if isinstance(array, ak.contents.numpyarray.NumpyArray): + return array + else: + return _find_innermost_content(array.content) + + def _convert_to_tensor_if_cupy(array): if isinstance(array, np.ndarray): return array From 22379220390a915984796611421af1eef92f2585 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Wed, 16 Oct 2024 13:48:49 +0300 Subject: [PATCH 6/9] add new functions to the documentation --- docs/reference/toctree.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt index 4b6ae1154d..0175459441 100644 --- a/docs/reference/toctree.txt +++ b/docs/reference/toctree.txt @@ -39,6 +39,14 @@ generated/ak.to_feather generated/ak.from_avro_file +.. toctree:: + :caption: Conversions for machine learning + + generated/ak.from_raggedtensor + generated/ak.to_raggedtensor + generated/ak.from_torch + generated/ak.to_torch + .. toctree:: :caption: Converting to Pandas DataFrames From e23f29aa38f30a6613d6fe99f77dbeebeb0e4885 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Wed, 16 Oct 2024 17:23:36 +0300 Subject: [PATCH 7/9] add cuda backend support for ak.from_raggedtensor --- .../operations/ak_from_raggedtensor.py | 41 +++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/src/awkward/operations/ak_from_raggedtensor.py b/src/awkward/operations/ak_from_raggedtensor.py index 1c895506c2..69c3558281 100644 --- a/src/awkward/operations/ak_from_raggedtensor.py +++ b/src/awkward/operations/ak_from_raggedtensor.py @@ -28,20 +28,43 @@ def from_raggedtensor(array): def _impl(array): + try: + import tensorflow as tf + except ImportError as err: + raise ImportError( + """to use ak.from_raggedtensor, you must install the 'tensorflow' package with: + + pip install tensorflow +or + conda install tensorflow""" + ) from err + try: # get the flat values - content = array.flat_values.numpy() + content = array.flat_values except AttributeError as err: raise TypeError( """only RaggedTensor can be converted to awkward array""" ) from err - # convert them to ak.contents right away + + # handle gpu and cpu instances separately + device = content.device + + # since TensorFlow currently does not support + # int32 variables being placed on the GPU, use CPU for them instead + if content.dtype == tf.int32: + device = "cpu" + + content = _tensor_to_np_or_cp(content, device) + + # convert flat_values to ak.contents right away content = ak.contents.NumpyArray(content) # get the offsets offsets_arr = [] for splits in array.nested_row_splits: - split = splits.numpy() + # handle gpu and cpu instances separately + split = _tensor_to_np_or_cp(splits, device) # convert to ak.index offset = ak.index.Index64(split) offsets_arr.append(offset) @@ -55,6 +78,18 @@ def _impl(array): return ak.Array(_recursive_call(content, offsets_arr, 0)) +def _tensor_to_np_or_cp(array, device): + import tensorflow as tf + + if "GPU" in device: + from awkward._nplikes.cupy import Cupy + + cp = Cupy.instance() + return cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array)) + else: + return array.numpy() + + def _recursive_call(content, offsets_arr, count): if count == len(offsets_arr) - 2: return ak.contents.ListOffsetArray( From 5cfccda310c0080fe2951a47298fcbfb4df72663 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Fri, 18 Oct 2024 15:20:52 +0300 Subject: [PATCH 8/9] add suggestions from Jim --- .../operations/ak_from_raggedtensor.py | 31 +++++++------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/src/awkward/operations/ak_from_raggedtensor.py b/src/awkward/operations/ak_from_raggedtensor.py index 69c3558281..4cd4c09e23 100644 --- a/src/awkward/operations/ak_from_raggedtensor.py +++ b/src/awkward/operations/ak_from_raggedtensor.py @@ -28,17 +28,6 @@ def from_raggedtensor(array): def _impl(array): - try: - import tensorflow as tf - except ImportError as err: - raise ImportError( - """to use ak.from_raggedtensor, you must install the 'tensorflow' package with: - - pip install tensorflow -or - conda install tensorflow""" - ) from err - try: # get the flat values content = array.flat_values @@ -48,12 +37,7 @@ def _impl(array): ) from err # handle gpu and cpu instances separately - device = content.device - - # since TensorFlow currently does not support - # int32 variables being placed on the GPU, use CPU for them instead - if content.dtype == tf.int32: - device = "cpu" + device = content.backing_device content = _tensor_to_np_or_cp(content, device) @@ -79,9 +63,18 @@ def _impl(array): def _tensor_to_np_or_cp(array, device): - import tensorflow as tf + if device.endswith("GPU", 0, -2): + try: + import tensorflow as tf + except ImportError as err: + raise ImportError( + """to use ak.from_raggedtensor, you must install the 'tensorflow' package with: + + pip install tensorflow + or + conda install tensorflow""" + ) from err - if "GPU" in device: from awkward._nplikes.cupy import Cupy cp = Cupy.instance() From b6f2ea4f00542d4099926b896d239a74185a0f57 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk Date: Thu, 24 Oct 2024 18:03:42 +0300 Subject: [PATCH 9/9] add suggestions from Jim --- src/awkward/operations/ak_from_raggedtensor.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/awkward/operations/ak_from_raggedtensor.py b/src/awkward/operations/ak_from_raggedtensor.py index 4cd4c09e23..cadf1edb12 100644 --- a/src/awkward/operations/ak_from_raggedtensor.py +++ b/src/awkward/operations/ak_from_raggedtensor.py @@ -2,6 +2,8 @@ from __future__ import annotations +import re + import awkward as ak from awkward._dispatch import high_level_function @@ -63,7 +65,13 @@ def _impl(array): def _tensor_to_np_or_cp(array, device): - if device.endswith("GPU", 0, -2): + matched_device = re.match(".*:(CPU|GPU):[0-9]+", device) + + if matched_device is None: + raise NotImplementedError( + f"TensorFlow device has an unexpected format: {device!r}" + ) + elif matched_device.groups()[0] == "GPU": try: import tensorflow as tf except ImportError as err: @@ -79,7 +87,7 @@ def _tensor_to_np_or_cp(array, device): cp = Cupy.instance() return cp.from_dlpack(tf.experimental.dlpack.to_dlpack(array)) - else: + elif matched_device.groups()[0] == "CPU": return array.numpy()