Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sc 27 grid #56

Merged
merged 8 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Operations on image and labels layers.
im.combine
im.segment
im.segment_points
im.add_grid_labels_layer
im.expand_labels_layer
im.align_labels_layers
im.apply_labels_layers
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ install_requires =
seaborn>=0.12.2
leidenalg>=0.9.1
geopandas>=1.0.1
dask-geopandas
omegaconf==2.3.0
nptyping
magicgui
Expand All @@ -48,6 +49,7 @@ install_requires =
opencv-python
datasets
crick
spatialdata_io @ git+https://github.com/ArneDefauw/spatialdata-io.git@visium_hd
python_requires = >=3.8
include_package_data = True
package_dir =
Expand Down
13 changes: 13 additions & 0 deletions src/sparrow/_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

import pooch
import pyrootutils
import pytest
from hydra import compose, initialize
Expand Down Expand Up @@ -95,6 +96,18 @@ def sdata_transcripts_mul_coord(tmpdir):
yield sdata


@pytest.fixture
def sdata_bin():
registry = get_registry()
unzip_path = registry.fetch(
"transcriptomics/visium_hd/mouse/sdata_custom_binning_visium_hd_unit_test.zarr.zip", processor=pooch.Unzip()
)
sdata = read_zarr(os.path.commonpath(unzip_path))
sdata.path = None

yield sdata


@pytest.fixture
def sdata_blobs():
sdata = cluster_blobs(
Expand Down
8 changes: 8 additions & 0 deletions src/sparrow/_tests/test_datasets/test_transcriptomics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from spatialdata import SpatialData

from sparrow.datasets.transcriptomics import visium_hd_example


def test_visium_hd_example():
sdata = visium_hd_example(bin_size=16)
assert isinstance(sdata, SpatialData)
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from sparrow.utils._keys import _SPATIAL


@pytest.mark.skipif(not importlib.util.find_spec("flowsom"), reason="requires the flowSOM library")
def test_flowsom(sdata_blobs):
Expand Down Expand Up @@ -32,7 +34,7 @@ def test_flowsom(sdata_blobs):
assert (fsom.get_cell_data().var.index == channels).all()

# sanity check for consistency between flowsom object and sdata object.
coord = fsom.get_cell_data().obsm["spatial"][-2]
coord = fsom.get_cell_data().obsm[_SPATIAL][-2]
assert (
fsom.get_cell_data()[-2].to_df()["lineage_9"].values[0]
== sdata_blobs[img_layer].sel(c=["lineage_9"]).data[0, coord[0], coord[1]].compute()
Expand Down Expand Up @@ -64,7 +66,7 @@ def test_flowsom_multi_c(sdata_multi_c):
assert int(fraction * np.prod(sdata_multi_c[img_layer].shape[1:])) == fsom.get_cell_data().shape[0]

# sanity check for consistency between flowsom object and sdata object.
coord = fsom.get_cell_data().obsm["spatial"][-2]
coord = fsom.get_cell_data().obsm[_SPATIAL][-2]
assert (
fsom.get_cell_data()[-2].to_df()["0"].values[0]
== sdata_multi_c[img_layer].sel(c=[0]).data[0, coord[0], coord[1]].compute()
Expand Down
76 changes: 76 additions & 0 deletions src/sparrow/_tests/test_image/test_segmentation/test_grid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import numpy as np
import pytest
from spatialdata import SpatialData

from sparrow.image.segmentation._grid import add_grid_labels_layer


@pytest.mark.parametrize("hex_size", [5, 13, 24, 27, 30])
def test_add_grid_labels_layer_hexagon(hex_size):
sdata = SpatialData()

output_shapes_layer = "hexagonal_shapes"
output_labels_layer = "hexagonal_labels"
shape = (1100, 1000)
offset = (80, 70)

sdata = add_grid_labels_layer(
sdata,
shape=shape,
offset=offset, # not recommended to add offset, better to add offset vias a translation
size=hex_size,
output_shapes_layer=output_shapes_layer,
output_labels_layer=output_labels_layer,
grid_type="hexagon",
)

assert output_shapes_layer in [*sdata.shapes]
assert output_labels_layer in [*sdata.labels]

assert sdata[output_labels_layer].shape == tuple(a + b for a, b in zip(shape, offset))
array_labels = sdata[output_labels_layer].data.compute()
unique_labels = np.unique(array_labels)
unique_labels = unique_labels[unique_labels != 0]
assert np.array_equal(unique_labels, np.array(sdata[output_shapes_layer].index))

# check that we fill the grid completely with hexagons.
assert np.where(array_labels > 0)[0].min() == offset[0]
assert np.where(array_labels > 0)[1].min() == offset[1]
assert (sdata[output_labels_layer].shape[0] - np.where(array_labels > 0)[0].max()) <= 2 * hex_size
assert (sdata[output_labels_layer].shape[1] - np.where(array_labels > 0)[1].max()) <= np.sqrt(3) * hex_size


@pytest.mark.parametrize("square_size", [5, 13, 24, 27, 30])
def test_add_grid_labels_layer_square(square_size):
sdata = SpatialData()

output_shapes_layer = "hexagonal_shapes"
output_labels_layer = "hexagonal_labels"
shape = (1100, 1000)
offset = (80, 70)

sdata = add_grid_labels_layer(
sdata,
shape=shape,
offset=offset,
size=square_size,
output_shapes_layer=output_shapes_layer,
output_labels_layer=output_labels_layer,
grid_type="square",
)

assert output_shapes_layer in [*sdata.shapes]
assert output_labels_layer in [*sdata.labels]

assert sdata[output_labels_layer].shape == tuple(a + b for a, b in zip(shape, offset))
array_labels = sdata[output_labels_layer].data.compute()
unique_labels = np.unique(array_labels)
unique_labels = unique_labels[unique_labels != 0]
assert np.array_equal(unique_labels, np.array(sdata[output_shapes_layer].index))

# check that we fill the grid completely with hexagons.
assert np.where(array_labels > 0)[0].min() == offset[0]
assert np.where(array_labels > 0)[1].min() == offset[1]
assert (sdata[output_labels_layer].shape[0] - np.where(array_labels > 0)[0].max()) <= square_size
assert (sdata[output_labels_layer].shape[1] - np.where(array_labels > 0)[1].max()) <= square_size
assert (array_labels[offset[0] : -square_size, offset[1] : -square_size] != 0).all()
39 changes: 38 additions & 1 deletion src/sparrow/_tests/test_table/test_allocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import pytest
from spatialdata import SpatialData

from sparrow.table._allocation import allocate
from sparrow.table._allocation import allocate, bin_counts
from sparrow.utils._keys import _INSTANCE_KEY, _SPATIAL


def test_allocation(sdata_transcripts: SpatialData):
Expand Down Expand Up @@ -68,3 +69,39 @@ def test_allocation_overwrite(sdata_transcripts: SpatialData):
append=False,
overwrite=False,
)


def test_bin_counts(
sdata_bin,
):
table_layer_bins = "square_002um"
labels_layer = (
"square_labels_32" # custom grid to bin the counts of table_layer_bins, can be any segmentation mask.
)
table_layer = "table_custom_bin_32"
output_table_layer = f"{table_layer}_reproduce"

# check that barcodes are unique in table_layer_bins of sdata_bin
assert sdata_bin.tables[table_layer_bins].obs.index.is_unique

sdata_bin = bin_counts(
sdata_bin,
table_layer=table_layer_bins,
labels_layer=labels_layer,
output_layer=output_table_layer,
overwrite=True,
append=False,
)

assert np.array_equal(
sdata_bin[table_layer].obs[_INSTANCE_KEY].values, sdata_bin[output_table_layer].obs[_INSTANCE_KEY].values
)

assert np.array_equal(sdata_bin[table_layer].var_names, sdata_bin[output_table_layer].var_names)

matrix1 = sdata_bin[table_layer].X
matrix2 = sdata_bin[output_table_layer].X

assert (matrix1 != matrix2).nnz == 0

assert np.array_equal(sdata_bin[table_layer].obsm[_SPATIAL], sdata_bin[output_table_layer].obsm[_SPATIAL])
24 changes: 12 additions & 12 deletions src/sparrow/_tests/test_utils/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from skimage.measure import regionprops_table
from xrspatial import zonal_stats

from sparrow.utils._aggregate import Aggregator, _get_mask_area
from sparrow.utils._aggregate import RasterAggregator, _get_mask_area
from sparrow.utils._keys import _CELLSIZE_KEY


Expand All @@ -34,7 +34,7 @@ def test_aggregate_sum_dask_array():

float_dask_array = float_dask_array[None, None, ...]

aggregator = Aggregator(mask_dask_array=mask_dask_array, image_dask_array=float_dask_array)
aggregator = RasterAggregator(mask_dask_array=mask_dask_array, image_dask_array=float_dask_array)

df_sum = aggregator.aggregate_sum()

Expand All @@ -50,7 +50,7 @@ def test_aggregate_sum(sdata):
image = se_image.data[:, None, ...]
mask = se_labels.data[None, ...]

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask.rechunk(512),
image_dask_array=image.rechunk(512),
)
Expand Down Expand Up @@ -88,7 +88,7 @@ def test_aggregate_min_max(sdata):
image = se_image.data[:, None, ...]
mask = se_labels.data[None, ...]

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask.rechunk(512),
image_dask_array=image.rechunk(512),
)
Expand Down Expand Up @@ -130,7 +130,7 @@ def test_aggregate_mean(sdata):
image = se_image.data[:, None, ...]
mask = se_labels.data[None, ...]

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask.rechunk(512),
image_dask_array=image.rechunk(512),
)
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_aggregate_var(sdata):
image = se_image.data[:, None, ...]
mask = se_labels.data[None, ...]

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask.rechunk(512),
image_dask_array=image.rechunk(512),
)
Expand Down Expand Up @@ -210,7 +210,7 @@ def test_aggregate_var_3D(sdata):
axis=1,
)

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask.rechunk((1, 512, 512)),
image_dask_array=image.rechunk((1, 1, 512, 512)),
)
Expand Down Expand Up @@ -275,7 +275,7 @@ def _calculate_intensity_mean(mask_block, image_block):
image = se_image.data[:, None, ...]
mask = se_labels.data[None, ...]

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask,
image_dask_array=image,
)
Expand Down Expand Up @@ -303,7 +303,7 @@ def _calculate_centroid_weighted(mask_block, image_block):
image = se_image.data[:, None, ...]
mask = se_labels.data[None, ...]

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask,
image_dask_array=image,
)
Expand Down Expand Up @@ -335,7 +335,7 @@ def _calculate_eccentricity(mask_block):
image = se_image.data[:, None, ...]
mask = se_labels.data[None, ...]

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask,
image_dask_array=image,
)
Expand Down Expand Up @@ -364,7 +364,7 @@ def _calculate_intensity_mean_area(mask_block, image_block):
image = se_image.data[:, None, ...]
mask = se_labels.data[None, ...]

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask,
image_dask_array=image,
)
Expand Down Expand Up @@ -395,7 +395,7 @@ def _calculate_intensity_mean_area(mask_block, image_block):
image = se_image.data[:, None, ...]
mask = se_labels.data[None, ...]

aggregator = Aggregator(
aggregator = RasterAggregator(
mask_dask_array=mask,
image_dask_array=image,
)
Expand Down
7 changes: 6 additions & 1 deletion src/sparrow/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
from .cluster_blobs import cluster_blobs
from .pixie_example import pixie_example
from .proteomics import macsima_example, mibi_example
from .transcriptomics import resolve_example
from .transcriptomics import (
resolve_example,
resolve_example_multiple_coordinate_systems,
visium_hd_example,
visium_hd_example_custom_binning,
)
5 changes: 2 additions & 3 deletions src/sparrow/datasets/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,9 @@ def get_registry(path: str | Path | None = None) -> Pooch:
"transcriptomics/resolve/mouse/markerGeneListMartinNoLow.csv": "1ffefe7d4e72e05ef158ee1e73919b50882a97b6590f4ae977041d6b8b66a459",
"transcriptomics/resolve/mouse/sdata_transcriptomics.zarr.zip": "30a5649b8a463a623d4e573716f8c365df8c5eed3e77b3e81abf0acaf5ffd1f3",
"transcriptomics/resolve/mouse/sdata_transcriptomics_coordinate_systems_unit_test.zarr.zip": "ef2ba1c0f6cc9aebe4cf394d1ee00e0622ea4f9273fedd36feb9c7a2363e41a7",
"transcriptomics/visium_hd/mouse/Visium_HD_Mouse_Small_Intestine_tissue_image.btf": "924e0f472007ae3d5983a29fb802537dad9cfe914176edcaad4dfcbf265988a2",
"transcriptomics/visium_hd/mouse/masks.geojson": "a02377ce9924662b440fd7ab91da95e51344e82cda5f27d698ca820030fbfbf3",
"transcriptomics/visium_hd/mouse/binned_outputs/square_002um/spatial/tissue_positions.parquet": "e5744349d3b1d72d89c926aad099eda5d09bd59f3128da5c3562d50225161911",
"transcriptomics/visium_hd/mouse/binned_outputs/square_002um/filtered_feature_bc_matrix.h5": "7e5f205a05d6b985a8c365554f6d428ca7fd22065eb89cd277806231528ddaae",
"transcriptomics/visium_hd/mouse/sdata_custom_binning_visium_hd_unit_test.zarr.zip": "346597ca5c85a6ab81239e5b7dbcd11c7715f7a4208cd4912ac78738bd3ed092",
"transcriptomics/visium_hd/mouse/visium_hd_mouse_small_intestine.zip": "791938dc972d4b42b255673c08dcb3948ebb66c60eabd1483c2fdb67f001256b",
"proteomics/mibi_tof/sdata_multi_channel.zarr.zip": "930fd2574666b90d5d6660ad8b52d47afffc9522704b9e6fef39d11c9cfff06e",
"proteomics/macsima/sdata_multi_channel.zarr.zip": "26187fe62b75c3411f948cbcea52abd24b318992509e768c2ed2f55ddcb99f28",
},
Expand Down
Loading