Skip to content

Commit

Permalink
Eoepca test suite (#297)
Browse files Browse the repository at this point in the history
* array concat handle axis

* array append handle axis

* fix array apply cases

* update for tests

* array test update

* fix array apply test
  • Loading branch information
ValentinaHutter authored Nov 25, 2024
1 parent eb3bdb1 commit 010c214
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 70 deletions.
39 changes: 33 additions & 6 deletions openeo_processes_dask/process_implementations/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
]


def get_labels(data, dimension="labels", axis=0):
def get_labels(data, dimension="labels", axis=0, dim_labels=None):
if isinstance(data, xr.DataArray):
dimension = data.dims[0] if len(data.dims) == 1 else dimension
if axis:
Expand All @@ -61,6 +61,8 @@ def get_labels(data, dimension="labels", axis=0):
labels = []
if isinstance(data, list):
data = np.asarray(data)
if not isinstance(dim_labels, type(None)):
labels = dim_labels
return labels, data


Expand All @@ -82,9 +84,7 @@ def array_element(
raise ArrayElementParameterConflict(
"The process `array_element` only allows that either the `index` or the `labels` parameter is set."
)

if isinstance(data, xr.DataArray):
dim_labels, data = get_labels(data, axis=axis)
dim_labels, data = get_labels(data, axis=axis, dim_labels=dim_labels)

if label is not None:
if len(dim_labels) == 0:
Expand Down Expand Up @@ -189,7 +189,7 @@ def modify(data):
return modified


def array_concat(array1: ArrayLike, array2: ArrayLike) -> ArrayLike:
def array_concat(array1: ArrayLike, array2: ArrayLike, axis=None) -> ArrayLike:
labels1, array1 = get_labels(array1)
labels2, array2 = get_labels(array2)

Expand All @@ -198,7 +198,21 @@ def array_concat(array1: ArrayLike, array2: ArrayLike) -> ArrayLike:
"At least one label exists in both arrays and the conflict must be resolved before."
)

concat = np.concatenate([array1, array2])
if (len(array1.shape) - len(array2.shape)) == 1:
if axis is None:
s1 = np.array(list(array1.shape))
s2 = list(array2.shape)
s2.append(0)
s2 = np.array(s2)

axis = np.argmax(s1 != s2)

array2 = np.expand_dims(array2, axis=axis)

if axis:
concat = np.concatenate([array1, array2], axis=axis)
else:
concat = np.concatenate([array1, array2])

# e.g. concating int32 and str arrays results in the result being cast to a Unicode dtype of a certain length (e.g. <U22).
# There isn't really anything better to do as numpy does not support heterogenuous arrays.
Expand All @@ -219,7 +233,20 @@ def array_append(
value: Any,
label: Optional[Any] = None,
dim_labels=None,
axis=None,
) -> ArrayLike:
if axis:
if isinstance(value, list) and len(value) == 1:
value = value[0]
if (isinstance(value, np.ndarray) or isinstance(value, da.core.Array)) and len(
value.flatten()
) == 1:
value = value.flatten()[0]

value = np.take(np.ones_like(data), indices=0, axis=axis) * value
concat = array_concat(data, value, axis=axis)
return concat

if dim_labels:
data = array_create_labeled(data=data, labels=dim_labels)
if label is not None:
Expand Down
39 changes: 31 additions & 8 deletions openeo_processes_dask/process_implementations/cubes/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,45 @@ def apply_dimension(
exclude_dims={dimension},
)

reordered_result = result.transpose(*data.dims, ...).rename(
{dimension: target_dimension}
)
reordered_result = result.transpose(*data.dims, ...)

if len(reordered_result[target_dimension]) == 1:
reordered_result[target_dimension] = ["0"]
if dimension in reordered_result.dims:
result_len = len(reordered_result[dimension])
else:
result_len = 1

# Case 1: target_dimension is not defined/ is source dimension
if dimension == target_dimension:
# dimension labels preserved
# if the number of source dimension's values is equal to the number of computed values
if len(reordered_data[dimension]) == result_len:
reordered_result[dimension] == reordered_data[dimension].values
else:
reordered_result[dimension] = np.arange(result_len)
elif target_dimension in reordered_result.dims:
# source dimension is not target dimension
# target dimension exists with a single label only
if len(reordered_result[target_dimension]) == 1:
reordered_result = reordered_result.drop_vars(target_dimension).squeeze(
target_dimension
)
reordered_result = reordered_result.rename({dimension: target_dimension})
reordered_result[dimension] = np.arange(result_len)
else:
raise Exception(
f"Cannot rename dimension {dimension} to {target_dimension} as {target_dimension} already exists in dataset and contains more than one label: {reordered_result[target_dimension]}. See process definition. "
)
else:
# source dimension is not the target dimension and the latter does not exist
reordered_result = reordered_result.rename({dimension: target_dimension})
reordered_result[target_dimension] = np.arange(result_len)

if data.rio.crs is not None:
try:
reordered_result.rio.write_crs(data.rio.crs, inplace=True)
except ValueError:
pass

if is_new_dim_added:
reordered_result.openeo.add_dim_type(name=target_dimension, type="other")

return reordered_result


Expand Down
57 changes: 1 addition & 56 deletions tests/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_apply(temporal_interval, bounding_box, random_raster_data, process_regi

@pytest.mark.parametrize("size", [(6, 5, 4, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_case_1(
def test_apply_dimension_add(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
Expand Down Expand Up @@ -78,61 +78,6 @@ def test_apply_dimension_case_1(
)


@pytest.mark.parametrize("size", [(6, 5, 4, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_target_dimension(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02", "B03", "B04", "B08"],
backend="dask",
)

_process = partial(
process_registry["mean"].implementation,
data=ParameterReference(from_parameter="data"),
)

# Target dimension is null and therefore defaults to the source dimension
output_cube_reduced = apply_dimension(
data=input_cube, process=_process, dimension="x", target_dimension="target"
)

expected_output = (input_cube.mean(dim="x")).expand_dims("target")

general_output_checks(
input_cube=input_cube,
output_cube=output_cube_reduced,
verify_attrs=True,
verify_crs=False,
expected_results=expected_output,
)

# Target dimension is null and therefore defaults to the source dimension
output_cube_reduced = apply_dimension(
data=input_cube, process=_process, dimension="x", target_dimension="y"
)
expected_output = (
input_cube.mean(dim="x")
.expand_dims("target")
.drop_vars("y")
.rename({"target": "y"})
)

general_output_checks(
input_cube=input_cube,
output_cube=output_cube_reduced,
verify_attrs=True,
verify_crs=False,
expected_results=expected_output,
)

assert "y" in output_cube_reduced.openeo.other_dims


@pytest.mark.parametrize("size", [(6, 5, 4, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_ordering_processes(
Expand Down

0 comments on commit 010c214

Please sign in to comment.