Skip to content

Commit

Permalink
Add cucim.skimage.segmentation.expand_labels (#341)
Browse files Browse the repository at this point in the history
closes #334
This adds a simple function that can be used to expand a set of labels in a label image.

It should be reviewed after #318 is merged. The new commits only start from 6241bd2.

Authors:
  - Gregory Lee (https://github.com/grlee77)

Approvers:
  - https://github.com/jakirkham

URL: #341
  • Loading branch information
grlee77 authored Aug 1, 2022
1 parent 6f22f2e commit 5fee5cd
Show file tree
Hide file tree
Showing 3 changed files with 324 additions and 0 deletions.
2 changes: 2 additions & 0 deletions python/cucim/src/cucim/skimage/segmentation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ._clear_border import clear_border
from ._expand_labels import expand_labels
from ._join import join_segmentations, relabel_sequential
from .boundaries import find_boundaries, mark_boundaries
from .morphsnakes import (checkerboard_level_set, disk_level_set,
Expand All @@ -7,6 +8,7 @@
from .random_walker_segmentation import random_walker

__all__ = [
"expand_labels",
"random_walker",
"find_boundaries",
"mark_boundaries",
Expand Down
96 changes: 96 additions & 0 deletions python/cucim/src/cucim/skimage/segmentation/_expand_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import cupy as cp

from cucim.core.operations.morphology import distance_transform_edt


def expand_labels(label_image, distance=1):
"""Expand labels in label image by ``distance`` pixels without overlapping.
Given a label image, ``expand_labels`` grows label regions (connected components)
outwards by up to ``distance`` pixels without overflowing into neighboring regions.
More specifically, each background pixel that is within Euclidean distance
of <= ``distance`` pixels of a connected component is assigned the label of that
connected component.
Where multiple connected components are within ``distance`` pixels of a background
pixel, the label value of the closest connected component will be assigned (see
Notes for the case of multiple labels at equal distance).
Parameters
----------
label_image : ndarray of dtype int
label image
distance : float
Euclidean distance in pixels by which to grow the labels. Default is one.
Returns
-------
enlarged_labels : ndarray of dtype int
Labeled array, where all connected regions have been enlarged
Notes
-----
Where labels are spaced more than ``distance`` pixels are apart, this is
equivalent to a morphological dilation with a disc or hyperball of radius ``distance``.
However, in contrast to a morphological dilation, ``expand_labels`` will
not expand a label region into a neighboring region.
This implementation of ``expand_labels`` is derived from CellProfiler [1]_, where
it is known as module "IdentifySecondaryObjects (Distance-N)" [2]_.
There is an important edge case when a pixel has the same distance to
multiple regions, as it is not defined which region expands into that
space. Here, the exact behavior depends on the upstream implementation
of ``scipy.ndimage.distance_transform_edt``.
See Also
--------
:func:`cucim.skimage.measure.label`, :func:`cucim.skimage.morphology.dilation` # noqa
References
----------
.. [1] https://cellprofiler.org
.. [2] https://github.com/CellProfiler/CellProfiler/blob/082930ea95add7b72243a4fa3d39ae5145995e9c/cellprofiler/modules/identifysecondaryobjects.py#L559 # noqa
Examples
--------
>>> labels = np.array([0, 1, 0, 0, 0, 0, 2])
>>> expand_labels(labels, distance=1)
array([1, 1, 1, 0, 0, 2, 2])
Labels will not overwrite each other:
>>> expand_labels(labels, distance=3)
array([1, 1, 1, 1, 2, 2, 2])
In case of ties, behavior is undefined, but currently resolves to the
label closest to ``(0,) * ndim`` in lexicographical order.
>>> labels_tied = np.array([0, 1, 0, 2, 0])
>>> expand_labels(labels_tied, 1)
array([1, 1, 1, 2, 2])
>>> labels2d = np.array(
... [[0, 1, 0, 0],
... [2, 0, 0, 0],
... [0, 3, 0, 0]]
... )
>>> expand_labels(labels2d, 1)
array([[2, 1, 1, 0],
[2, 2, 0, 0],
[2, 3, 3, 0]])
"""

distances, nearest_label_coords = distance_transform_edt(
label_image == 0, return_indices=True
)
labels_out = cp.zeros_like(label_image)
dilate_mask = distances <= distance
# build the coordinates to find nearest labels,
# in contrast to [1] this implementation supports label arrays
# of any dimension
masked_nearest_label_coords = [
dimension_indices[dilate_mask]
for dimension_indices in nearest_label_coords
]
nearest_labels = label_image[tuple(masked_nearest_label_coords)]
labels_out[dilate_mask] = nearest_labels
return labels_out
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
import cupy as cp
import pytest
from cupy.testing import assert_array_equal

from cucim.core.operations.morphology import distance_transform_edt
from cucim.skimage import data, measure
from cucim.skimage.segmentation import expand_labels

SAMPLE1D = cp.array([0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0])
SAMPLE1D_EXPANDED_3 = cp.array(
[4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]
)

# Some pixels are important edge cases with undefined behaviour:
# these are the pixels that are at the same distance from
# multiple labels. Ideally the label would be chosen at random
# to avoid bias, but as we are relying on the index map returned
# by the scipy.ndimage distance transform, what actually happens
# is determined by the upstream implementation of the distance
# tansform, thus we don't give any guarantees for the edge case pixels.
#
# Regardless, it seems prudent to have a test including an edge case
# so we can detect whether future upstream changes in scipy.ndimage
# modify the behaviour.

EDGECASE1D = cp.array([0, 0, 4, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0])
EDGECASE1D_EXPANDED_3 = cp.array(
[4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]
)

SAMPLE2D = cp.array(
[
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]
)

SAMPLE2D_EXPANDED_3 = cp.array(
[
[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 0],
[1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2],
[1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2],
[1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
[1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2],
[1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2],
[0, 0, 1, 0, 0, 0, 0, 2, 2, 2, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0],
]
)

# non-integer expansion
SAMPLE2D_EXPANDED_1_5 = cp.array(
[
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 0, 0, 0, 2, 2, 2],
[1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2],
[0, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
]
)


EDGECASE2D = cp.array(
[
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 0],
[0, 1, 1, 1, 0, 2, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
]
)

EDGECASE2D_EXPANDED_4 = cp.array(
[
[1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 0],
[1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2],
[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2],
[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0],
[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0],
]
)

SAMPLE3D = cp.array(
[
[
[0, 0, 0, 0],
[0, 3, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
],

[
[0, 0, 0, 0],
[0, 3, 3, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
],

[
[0, 0, 0, 0],
[0, 3, 0, 0],
[0, 0, 0, 0],
[0, 0, 5, 0],
],

[
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 5, 0],
]
]
)

SAMPLE3D_EXPANDED_2 = cp.array(
[
[
[3, 3, 3, 3],
[3, 3, 3, 3],
[3, 3, 3, 3],
[0, 3, 5, 0],
],

[
[3, 3, 3, 3],
[3, 3, 3, 3],
[3, 3, 3, 3],
[0, 5, 5, 5],
],

[
[3, 3, 3, 3],
[3, 3, 3, 3],
[3, 3, 5, 5],
[5, 5, 5, 5],
],

[
[3, 3, 3, 0],
[3, 3, 3, 0],
[3, 3, 5, 5],
[5, 5, 5, 5],
]
]
)

SAMPLE_EDGECASE_BEHAVIOUR = cp.array(
[[0, 1, 0, 0], [2, 0, 0, 0], [0, 3, 0, 0]]
)


@pytest.mark.parametrize(
"input_array, expected_output, expand_distance",
[
(SAMPLE1D, SAMPLE1D_EXPANDED_3, 3),
(SAMPLE2D, SAMPLE2D_EXPANDED_3, 3),
(SAMPLE2D, SAMPLE2D_EXPANDED_1_5, 1.5),
(EDGECASE1D, EDGECASE1D_EXPANDED_3, 3),
(EDGECASE2D, EDGECASE2D_EXPANDED_4, 4),
(SAMPLE3D, SAMPLE3D_EXPANDED_2, 2)
]
)
def test_expand_labels(input_array, expected_output, expand_distance):
if input_array.ndim == 1:
with pytest.raises(NotImplementedError):
expand_labels(input_array, expand_distance)
else:
expanded = expand_labels(input_array, expand_distance)
assert_array_equal(expanded, expected_output)


@pytest.mark.parametrize('ndim', [2, 3])
@pytest.mark.parametrize('distance', range(6))
def test_binary_blobs(ndim, distance):
"""Check some invariants with label expansion.
- New labels array should exactly contain the original labels array.
- Distance to old labels array within new labels should never exceed input
distance.
- Distance beyond the expanded labels should always exceed the input
distance.
"""
img = data.binary_blobs(length=64, blob_size_fraction=0.05, n_dim=ndim)
labels = measure.label(img)
expanded = expand_labels(labels, distance=distance)
original_mask = labels != 0
assert_array_equal(labels[original_mask], expanded[original_mask])
expanded_only_mask = (expanded - labels).astype(bool)
distance_map = distance_transform_edt(~original_mask)
expanded_distances = distance_map[expanded_only_mask]
if expanded_distances.size > 0:
assert cp.all(expanded_distances <= distance)
beyond_expanded_distances = distance_map[~expanded.astype(bool)]
if beyond_expanded_distances.size > 0:
assert cp.all(beyond_expanded_distances > distance)


def test_edge_case_behaviour():
""" Check edge case behavior to detect upstream changes
For edge cases where a pixel has the same distance to several regions,
lexicographical order seems to determine which region gets to expand
into this pixel given the current upstream behaviour in
scipy.ndimage.distance_map_edt.
As a result, we expect different results when transposing the array.
If this test fails, something has changed upstream.
"""
expanded = expand_labels(SAMPLE_EDGECASE_BEHAVIOUR, 1)
expanded_transpose = expand_labels(SAMPLE_EDGECASE_BEHAVIOUR.T, 1)
assert not cp.all(expanded == expanded_transpose.T)

0 comments on commit 5fee5cd

Please sign in to comment.