Skip to content

Commit

Permalink
Implement rowsize_to_index (Cloud-Drift#273)
Browse files Browse the repository at this point in the history
* Implement rowsize_to_index

* Update analysis.py

expand docstring an example

* lint

---------

Co-authored-by: Shane Elipot <selipot@miami.edu>
Co-authored-by: Philippe Miron <philippe.miron@dtn.com>
  • Loading branch information
3 people committed Nov 16, 2023
1 parent d2c3861 commit adbf94d
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 4 deletions.
31 changes: 29 additions & 2 deletions clouddrift/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,33 @@ def segment(
return np.concatenate(segment_sizes)


def rowsize_to_index(rowsize: Union[list, np.ndarray, xr.DataArray]) -> np.ndarray:
"""Convert a list of row sizes to a list of indices.
This function is typically used to obtain the indices of data rows organized
in a ragged array.
Parameters
----------
rowsize : list or np.ndarray or xr.DataArray
A list of row sizes.
Returns
-------
np.ndarray
A list of indices.
Examples
--------
To obtain the indices within a ragged array of three consecutive rows of sizes 100, 202, and 53:
>>> rowsize_to_index([100, 202, 53])
array([0, 100, 302, 355])
"""
return np.cumsum(np.insert(np.array(rowsize), 0, 0))


def position_from_velocity(
u: np.ndarray,
v: np.ndarray,
Expand Down Expand Up @@ -1023,7 +1050,7 @@ def subset(
raise ValueError(f"Unknown variable '{key}'.")

# remove data when trajectories are filtered
traj_idx = np.insert(np.cumsum(ds[rowsize_var_name].values), 0, 0)
traj_idx = rowsize_to_index(ds[rowsize_var_name].values)
for i in np.where(~mask_traj)[0]:
mask_obs[slice(traj_idx[i], traj_idx[i + 1])] = False

Expand Down Expand Up @@ -1093,5 +1120,5 @@ def unpack_ragged(
)):
u, v = velocity_from_position(lon, lat, time)
"""
indices = np.insert(np.cumsum(np.array(rowsize)), 0, 0)
indices = rowsize_to_index(rowsize)
return [ragged_array[indices[n] : indices[n + 1]] for n in range(indices.size - 1)]
5 changes: 3 additions & 2 deletions clouddrift/raggedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Datasets and Awkward Arrays.
"""
import awkward as ak
from clouddrift.analysis import rowsize_to_index
import xarray as xr
import numpy as np
from collections.abc import Callable
Expand Down Expand Up @@ -316,7 +317,7 @@ def allocate(
ds = preprocess_func(indices[0], **kwargs)
nb_traj = len(rowsize)
nb_obs = np.sum(rowsize).astype("int")
index_traj = np.insert(np.cumsum(rowsize), 0, 0)
index_traj = rowsize_to_index(rowsize)

# allocate memory
coords = {}
Expand Down Expand Up @@ -410,7 +411,7 @@ def to_awkward(self):
ak.Array
Awkward Array containing the ragged array and its attributes
"""
index_traj = np.insert(np.cumsum(self.metadata["rowsize"]), 0, 0)
index_traj = rowsize_to_index(self.metadata["rowsize"])
offset = ak.index.Index64(index_traj)

data = []
Expand Down
12 changes: 12 additions & 0 deletions tests/analysis_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
position_from_velocity,
ragged_to_regular,
regular_to_ragged,
rowsize_to_index,
segment,
subset,
unpack_ragged,
Expand Down Expand Up @@ -889,3 +890,14 @@ def test_unpack_ragged(self):
self.assertTrue(
np.all([lon[n].size == ds["rowsize"][n] for n in range(len(lon))])
)


class rowsize_to_index_tests(unittest.TestCase):
def test_rowsize_to_index(self):
rowsize = [2, 3, 4]
expected = np.array([0, 2, 5, 9])
self.assertTrue(np.all(rowsize_to_index(rowsize) == expected))
self.assertTrue(np.all(rowsize_to_index(np.array(rowsize)) == expected))
self.assertTrue(
np.all(rowsize_to_index(xr.DataArray(data=rowsize)) == expected)
)

0 comments on commit adbf94d

Please sign in to comment.