From 27a1544ef4a62d15184fbb44d290a97baf850a12 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sat, 27 Apr 2024 11:11:23 -0400 Subject: [PATCH] add missing ensure_spacing tests * was previously only being tested indirectly via tests for peak_local_max --- .../cucim/src/cucim/skimage/_shared/coord.py | 2 + .../cucim/skimage/_shared/tests/test_coord.py | 98 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 python/cucim/src/cucim/skimage/_shared/tests/test_coord.py diff --git a/python/cucim/src/cucim/skimage/_shared/coord.py b/python/cucim/src/cucim/skimage/_shared/coord.py index 9d5e45a7a..5ee981bd8 100644 --- a/python/cucim/src/cucim/skimage/_shared/coord.py +++ b/python/cucim/src/cucim/skimage/_shared/coord.py @@ -106,6 +106,8 @@ def ensure_spacing( if len(coords): coords = cp.atleast_2d(coords) coords = cp.asnumpy(coords) + if not np.isscalar(spacing): + spacing = cp.asnumpy(spacing) if min_split_size is None: batch_list = [coords] else: diff --git a/python/cucim/src/cucim/skimage/_shared/tests/test_coord.py b/python/cucim/src/cucim/skimage/_shared/tests/test_coord.py new file mode 100644 index 000000000..2a8353789 --- /dev/null +++ b/python/cucim/src/cucim/skimage/_shared/tests/test_coord.py @@ -0,0 +1,98 @@ +import time + +import cupy as cp +import numpy as np +import pytest +from scipy.spatial.distance import minkowski, pdist + +from cucim.skimage._shared.coord import ensure_spacing + + +@pytest.mark.parametrize("p", [1, 2, np.inf]) +@pytest.mark.parametrize("size", [30, 50, None]) +def test_ensure_spacing_trivial(p, size): + # --- Empty input + assert ensure_spacing(cp.asarray([]), p_norm=p).size == 0 + + # --- A unique point + coord = cp.random.randn(1, 2) + assert cp.array_equal( + coord, ensure_spacing(coord, p_norm=p, min_split_size=size) + ) + + # --- Verified spacing + coord = cp.random.randn(100, 2) + + # --- 0 spacing + assert cp.array_equal( + coord, ensure_spacing(coord, spacing=0, p_norm=p, min_split_size=size) + ) + + # Spacing is chosen to be half the minimum distance + coord_cpu = cp.asnumpy(coord) + spacing = cp.asarray(pdist(coord_cpu, metric=minkowski, p=p).min() * 0.5) + + out = ensure_spacing(coord, spacing=spacing, p_norm=p, min_split_size=size) + + assert cp.array_equal(coord, out) + + +@pytest.mark.parametrize("ndim", [1, 2, 3, 4, 5]) +@pytest.mark.parametrize("size", [2, 10, None]) +def test_ensure_spacing_nD(ndim, size): + coord = cp.ones((5, ndim)) + + expected = cp.ones((1, ndim)) + + assert cp.array_equal(ensure_spacing(coord, min_split_size=size), expected) + + +@pytest.mark.parametrize("p", [1, 2, np.inf]) +@pytest.mark.parametrize("size", [50, 100, None]) +def test_ensure_spacing_batch_processing(p, size): + coord_cpu = np.random.randn(100, 2) + + # --- Consider the average distance btween the point as spacing + spacing = cp.asarray(np.median(pdist(coord_cpu, metric=minkowski, p=p))) + coord = cp.asarray(coord_cpu) + + expected = ensure_spacing(coord, spacing=spacing, p_norm=p) + + cp.testing.assert_array_equal( + ensure_spacing(coord, spacing=spacing, p_norm=p, min_split_size=size), + expected, + ) + + +def test_max_batch_size(): + """Small batches are slow, large batches -> large allocations -> also slow. + + https://github.com/scikit-image/scikit-image/pull/6035#discussion_r751518691 + """ + coords = cp.random.randint(low=0, high=1848, size=(40000, 2)) + tstart = time.time() + ensure_spacing(coords, spacing=100, min_split_size=50, max_split_size=2000) + dur1 = time.time() - tstart + + tstart = time.time() + ensure_spacing(coords, spacing=100, min_split_size=50, max_split_size=20000) + dur2 = time.time() - tstart + + # Originally checked dur1 < dur2 to assert that the default batch size was + # faster than a much larger batch size. However, on rare occasion a CI test + # case would fail with dur1 ~5% larger than dur2. To be more robust to + # variable load or differences across architectures, we relax this here. + assert dur1 < 1.33 * dur2 + + +@pytest.mark.parametrize("p", [1, 2, np.inf]) +@pytest.mark.parametrize("size", [30, 50, None]) +def test_ensure_spacing_p_norm(p, size): + coord_cpu = np.random.randn(100, 2) + + # --- Consider the average distance btween the point as spacing + spacing = cp.asarray(np.median(pdist(coord_cpu, metric=minkowski, p=p))) + coord = cp.asarray(coord_cpu) + out = ensure_spacing(coord, spacing=spacing, p_norm=p, min_split_size=size) + + assert pdist(cp.asnumpy(out), metric=minkowski, p=p).min() > spacing