diff --git a/clouddrift/analysis.py b/clouddrift/analysis.py index c0e56302..9eec9312 100644 --- a/clouddrift/analysis.py +++ b/clouddrift/analysis.py @@ -466,6 +466,33 @@ def segment( return np.concatenate(segment_sizes) +def rowsize_to_index(rowsize: Union[list, np.ndarray, xr.DataArray]) -> np.ndarray: + """Convert a list of row sizes to a list of indices. + + This function is typically used to obtain the indices of data rows organized + in a ragged array. + + Parameters + ---------- + rowsize : list or np.ndarray or xr.DataArray + A list of row sizes. + + Returns + ------- + np.ndarray + A list of indices. + + Examples + -------- + + To obtain the indices within a ragged array of three consecutive rows of sizes 100, 202, and 53: + + >>> rowsize_to_index([100, 202, 53]) + array([0, 100, 302, 355]) + """ + return np.cumsum(np.insert(np.array(rowsize), 0, 0)) + + def position_from_velocity( u: np.ndarray, v: np.ndarray, @@ -1035,7 +1062,7 @@ def subset( raise ValueError(f"Unknown variable '{key}'.") # remove data when trajectories are filtered - traj_idx = np.insert(np.cumsum(ds[rowsize_var_name].values), 0, 0) + traj_idx = rowsize_to_index(ds[rowsize_var_name].values) for i in np.where(~mask_traj)[0]: mask_obs[slice(traj_idx[i], traj_idx[i + 1])] = False @@ -1105,5 +1132,5 @@ def unpack_ragged( )): u, v = velocity_from_position(lon, lat, time) """ - indices = np.insert(np.cumsum(np.array(rowsize)), 0, 0) + indices = rowsize_to_index(rowsize) return [ragged_array[indices[n] : indices[n + 1]] for n in range(indices.size - 1)] diff --git a/clouddrift/raggedarray.py b/clouddrift/raggedarray.py index 1b0b1055..c2155b7f 100644 --- a/clouddrift/raggedarray.py +++ b/clouddrift/raggedarray.py @@ -4,6 +4,7 @@ Datasets and Awkward Arrays. """ import awkward as ak +from clouddrift.analysis import rowsize_to_index import xarray as xr import numpy as np from collections.abc import Callable @@ -316,7 +317,7 @@ def allocate( ds = preprocess_func(indices[0], **kwargs) nb_traj = len(rowsize) nb_obs = np.sum(rowsize).astype("int") - index_traj = np.insert(np.cumsum(rowsize), 0, 0) + index_traj = rowsize_to_index(rowsize) # allocate memory coords = {} @@ -410,7 +411,7 @@ def to_awkward(self): ak.Array Awkward Array containing the ragged array and its attributes """ - index_traj = np.insert(np.cumsum(self.metadata["rowsize"]), 0, 0) + index_traj = rowsize_to_index(self.metadata["rowsize"]) offset = ak.index.Index64(index_traj) data = [] diff --git a/tests/analysis_tests.py b/tests/analysis_tests.py index b9f180e8..2c8af5a2 100644 --- a/tests/analysis_tests.py +++ b/tests/analysis_tests.py @@ -5,6 +5,7 @@ position_from_velocity, ragged_to_regular, regular_to_ragged, + rowsize_to_index, segment, subset, unpack_ragged, @@ -889,3 +890,14 @@ def test_unpack_ragged(self): self.assertTrue( np.all([lon[n].size == ds["rowsize"][n] for n in range(len(lon))]) ) + + +class rowsize_to_index_tests(unittest.TestCase): + def test_rowsize_to_index(self): + rowsize = [2, 3, 4] + expected = np.array([0, 2, 5, 9]) + self.assertTrue(np.all(rowsize_to_index(rowsize) == expected)) + self.assertTrue(np.all(rowsize_to_index(np.array(rowsize)) == expected)) + self.assertTrue( + np.all(rowsize_to_index(xr.DataArray(data=rowsize)) == expected) + )