Skip to content

Commit

Permalink
Distance table generation without segmentation masks (#1159)
Browse files Browse the repository at this point in the history
* use cell table centroids to get distances

* adjust tests

* remove torch dependency

* fix spatial enrichment tests
  • Loading branch information
camisowers authored Oct 4, 2024
1 parent 301e344 commit cb8ccdb
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 54 deletions.
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ dependencies = [
"statsmodels>=0.13.2,<1",
"tifffile>=2022",
"torchdata",
"torch==2.2.0",
"tqdm>=4,<5",
"umap-learn>=0.5,<1.0",
"xarray>=2022",
Expand Down
49 changes: 21 additions & 28 deletions src/ark/analysis/spatial_analysis_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
import numpy as np
import pandas as pd
import scipy
import skimage.measure
import sklearn.metrics
import xarray as xr
from alpineer import io_utils, load_utils, misc_utils
from alpineer import io_utils, misc_utils
from scipy.spatial.distance import cdist
from sklearn.cluster import KMeans
from statsmodels.stats.multitest import multipletests
Expand All @@ -16,56 +15,50 @@
from ark.utils._bootstrapping import compute_close_num_rand


def calc_dist_matrix(label_dir, save_path, prefix='_whole_cell'):
def calc_dist_matrix(cell_table, save_path, fov_id=settings.FOV_ID, label_id=settings.CELL_LABEL,
centroid_ids=(settings.CENTROID_0, settings.CENTROID_1)):
"""Generate matrix of distances between center of pairs of cells.
Saves each one individually to `save_path`.
Args:
label_dir (str):
path to segmentation masks indexed by `(fov, cell_id, cell_id, label)`
cell_table (str):
data frame with fov, label, and centroid information
save_path (str):
path to save the distance matrices
prefix (str):
the prefix used to identify label map files in `label_dir`
fov_id (str):
the column name containing the fov
label_id (str):
the column name containing the cell label
centroid_ids (tuple):
the columns identifying the centroids of each cell
"""

# check that both label_dir and save_path exist
io_utils.validate_paths([label_dir, save_path])
io_utils.validate_paths([save_path])

# load all the file names in label_dir
fov_files = io_utils.list_files(label_dir, substrs=prefix + '.tiff')
fovs = cell_table[fov_id].unique()

# iterate for each fov
with tqdm(total=len(fov_files), desc="Distance Matrix Generation", unit="FOVs") \
with tqdm(total=len(fovs), desc="Distance Matrix Generation", unit="FOVs") \
as dist_mat_progress:
for fov_file in fov_files:
dist_mat_progress.set_postfix(FOV=fov_file)
for fov in fovs:
dist_mat_progress.set_postfix(FOV=fov)

# retrieve the fov name
fov_name = fov_file.replace(prefix + '.tiff', '')
fov_table = cell_table[cell_table[fov_id] == fov]

# load in the data
fov_data = load_utils.load_imgs_from_dir(
label_dir, [fov_file], match_substring=prefix,
trim_suffix=prefix, xr_channel_names=['label']
)

# keep just the middle two dimensions
fov_data = fov_data.loc[fov_name, :, :, 'label'].values

# extract region properties of label map, then just get centroids
props = skimage.measure.regionprops(fov_data)
centroids = [prop.centroid for prop in props]
centroid_labels = [prop.label for prop in props]
# get centroid and label info
centroids = [(row[centroid_ids[0]], row[centroid_ids[1]]) for indx, row in fov_table.iterrows()]
centroid_labels = list(fov_table[label_id])

# generate the distance matrix, then assign centroid_labels as coords
dist_matrix = cdist(centroids, centroids).astype(np.float32)
dist_mat_xarr = xr.DataArray(dist_matrix, coords=[centroid_labels, centroid_labels])

# save the distance matrix to save_path
dist_mat_xarr.to_netcdf(
os.path.join(save_path, fov_name + '_dist_mat.xr'),
os.path.join(save_path, fov + '_dist_mat.xr'),
format='NETCDF3_64BIT'
)

Expand Down
7 changes: 4 additions & 3 deletions templates/Calculate_Mixing_Scores.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@
"outputs": [],
"source": [
"cell_table_path = os.path.join(base_dir, \"segmentation/cell_table/cell_table_size_normalized_cell_labels.csv\")\n",
"segmentation_dir = os.path.join(base_dir, \"segmentation/deepcell_output\")\n",
"\n",
"spatial_analysis_dir = os.path.join(base_dir, \"spatial_analysis\")\n",
"dist_mat_dir = os.path.join(spatial_analysis_dir, \"dist_mats\")\n",
Expand All @@ -134,7 +133,9 @@
"# create the dist_mat_dir directory if it doesn't exist\n",
"if not os.path.exists(dist_mat_dir):\n",
" os.makedirs(dist_mat_dir)\n",
" spatial_analysis_utils.calc_dist_matrix(segmentation_dir, dist_mat_dir)\n",
" cell_table = pd.read_csv(cell_table_path)\n",
" spatial_analysis_utils.calc_dist_matrix(cell_table, dist_mat_dir, fov_id='fov', label_id='label', \n",
" centroid_ids=('centroid-0', 'centroid-1'))\n",
" \n",
"# create the neighbors_mat_dir directory if it doesn't exist\n",
"if not os.path.exists(neighbors_mat_dir):\n",
Expand Down Expand Up @@ -443,7 +444,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.14"
},
"vscode": {
"interpreter": {
Expand Down
26 changes: 15 additions & 11 deletions templates/cell_neighbors_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@
"outputs": [],
"source": [
"spatial_analysis_dir = os.path.join(base_dir, \"spatial_analysis\")\n",
"segmentation_dir = os.path.join(base_dir, \"segmentation/deepcell_output\")\n",
"cell_table_path = os.path.join(base_dir, \"segmentation/cell_table/cell_table_size_normalized_cell_labels.csv\")\n",
"dist_mat_dir = os.path.join(spatial_analysis_dir, \"dist_mats\")\n",
"\n",
Expand All @@ -117,7 +116,9 @@
"# generate distance matrices if needed\n",
"if not os.path.exists(dist_mat_dir):\n",
" os.makedirs(dist_mat_dir)\n",
" spatial_analysis_utils.calc_dist_matrix(segmentation_dir, dist_mat_dir)\n",
" cell_table = pd.read_csv(cell_table_path)\n",
" spatial_analysis_utils.calc_dist_matrix(cell_table, dist_mat_dir, fov_id='fov', label_id='label', \n",
" centroid_ids=('centroid-0', 'centroid-1'))\n",
"\n",
"# create neighbors matrix and output directories\n",
"for directory in [neighbors_mat_dir, output_dir]:\n",
Expand Down Expand Up @@ -159,7 +160,10 @@
"outputs": [],
"source": [
"pixel_radius = 50\n",
"cell_type_col = settings.CELL_TYPE"
"cell_type_col = settings.CELL_TYPE\n",
"\n",
"counts_path = os.path.join(neighbors_mat_dir, f\"neighborhood_counts-{cell_type_col}_radius{pixel_radius}.csv\")\n",
"freqs_path = os.path.join(neighbors_mat_dir, f\"neighborhood_freqs-{cell_type_col}_radius{pixel_radius}.csv\")"
]
},
{
Expand All @@ -173,17 +177,17 @@
},
"outputs": [],
"source": [
"freqs_path = os.path.join(neighbors_mat_dir, f\"neighborhood_freqs-{cell_type_col}_radius{pixel_radius}.csv\")\n",
" \n",
"# Check for existing neighbors matrix and if not, create a new one\n",
"# Check for existing neighbors matrix\n",
"if not os.path.exists(freqs_path):\n",
" print(f\"Generating neighbors matrix for {cell_type_col}.\")\n",
" \n",
" all_data = pd.read_csv(cell_table_path)\n",
" _, neighbor_freqs = create_neighborhood_matrix(\n",
" all_data, dist_mat_dir, distlim=pixel_radius, cell_type_col=cell_type_col)\n",
" \n",
" # Save neighbors frequency matrix\n",
" # Create new matrix with the radius and cell column specified above\n",
" neighbor_counts, neighbor_freqs = create_neighborhood_matrix(\n",
" all_data, dist_mat_dir, distlim=pixel_radius, cell_type_col=cell_type_col)\n",
"\n",
" # Save neighbor matrices\n",
" neighbor_counts.to_csv(counts_path, index=False)\n",
" neighbor_freqs.to_csv(freqs_path, index=False)"
]
},
Expand Down Expand Up @@ -286,7 +290,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down
5 changes: 3 additions & 2 deletions templates/example_neighborhood_analysis_script.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@
"# create the dist_mat_output directory if it doesn't exist\n",
"if not os.path.exists(dist_mat_dir):\n",
" os.makedirs(dist_mat_dir)\n",
" spatial_analysis_utils.calc_dist_matrix(segmentation_dir, dist_mat_dir)\n",
" spatial_analysis_utils.calc_dist_matrix(all_data, dist_mat_dir, fov_id='fov', label_id='label', \n",
" centroid_ids=('centroid-0', 'centroid-1')) \n",
" \n",
"# verify all the FOVs computed exist in all_data\n",
"misc_utils.verify_in_list(\n",
Expand Down Expand Up @@ -740,7 +741,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
"version": "3.10.14"
},
"vscode": {
"interpreter": {
Expand Down
15 changes: 8 additions & 7 deletions tests/analysis/spatial_analysis_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import pytest
import test_utils
import xarray as xr
from alpineer.test_utils import _write_labels

import ark.settings as settings
from ark.analysis import spatial_analysis_utils
Expand All @@ -21,14 +20,16 @@ def test_calc_dist_matrix():
os.mkdir(save_path)

# generate sample label data
# NOTE: this function should support varying FOV sizes
_write_labels(label_dir, ["fov8"], ["label"], (10, 10),
'', True, np.uint8, suffix='_whole_cell')
_write_labels(label_dir, ["fov9"], ["label"], (5, 5),
'', True, np.uint8, suffix='_whole_cell')
cell_table = pd.DataFrame(
{
settings.FOV_ID: ['fov8'] * 4 + ['fov9'] * 4,
settings.CELL_LABEL: [1, 2, 3, 4] * 2,
settings.CENTROID_0: [1.5, 1.5, 7.5, 7.5, 0.5, 0.5, 3.5, 3.5],
settings.CENTROID_1: [1.5, 7.5, 1.5, 7.5, 0.5, 3.5, 0.5, 3.5],
})

# generate the distance matrices
spatial_analysis_utils.calc_dist_matrix(label_dir, save_path)
spatial_analysis_utils.calc_dist_matrix(cell_table, save_path)

# assert the fov8 and fov9 .xr files exist
assert os.path.exists(os.path.join(save_path, 'fov8_dist_mat.xr'))
Expand Down
16 changes: 14 additions & 2 deletions tests/analysis/spatial_enrichment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@ def test_generate_channel_spatial_enrichment_stats():
_write_labels(label_dir, ["fov8", "fov9"], ["label"], (10, 10),
'', True, np.uint8, suffix='_whole_cell')

spatial_analysis_utils.calc_dist_matrix(label_dir, dist_mat_dir)
label_maps = load_utils.load_imgs_from_dir(label_dir, trim_suffix="_whole_cell",
xr_channel_names=["label"])
all_data = test_utils.spoof_cell_table_from_labels(label_maps)
all_data[settings.CENTROID_0] = [1.5, 1.5, 7.5, 7.5, 1.5, 1.5, 7.5, 7.5]
all_data[settings.CENTROID_1] = [1.5, 7.5, 1.5, 7.5, 1.5, 7.5, 1.5, 7.5]
spatial_analysis_utils.calc_dist_matrix(all_data, dist_mat_dir)

vals_pos, stats_pos = \
spatial_enrichment.generate_channel_spatial_enrichment_stats(
Expand Down Expand Up @@ -81,10 +83,20 @@ def test_generate_cluster_spatial_enrichment_stats():
_write_labels(label_dir, ["fov8", "fov9"], ["label"], (10, 10),
'', True, np.uint8, suffix='_whole_cell')

spatial_analysis_utils.calc_dist_matrix(label_dir, dist_mat_dir)
cell_table = pd.DataFrame(
{
settings.FOV_ID: ['fov8'] * 4 + ['fov9'] * 4,
settings.CELL_LABEL: [1, 2, 3, 4] * 2,
settings.CENTROID_0: [1.5, 1.5, 7.5, 7.5, 1.5, 1.5, 7.5, 7.5],
settings.CENTROID_1: [1.5, 7.5, 1.5, 7.5, 1.5, 7.5, 1.5, 7.5],
})

label_maps = load_utils.load_imgs_from_dir(label_dir, trim_suffix="_whole_cell",
xr_channel_names=["label"])
all_data = test_utils.spoof_cell_table_from_labels(label_maps)
all_data[settings.CENTROID_0] = [1.5, 1.5, 7.5, 7.5, 1.5, 1.5, 7.5, 7.5]
all_data[settings.CENTROID_1] = [1.5, 7.5, 1.5, 7.5, 1.5, 7.5, 1.5, 7.5]
spatial_analysis_utils.calc_dist_matrix(all_data, dist_mat_dir)

vals_pos, stats_pos = \
spatial_enrichment.generate_cluster_spatial_enrichment_stats(
Expand Down

0 comments on commit cb8ccdb

Please sign in to comment.