Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NickAkhmetov/HMP-159 Upgrade vitessce-python dependency #81

Merged
merged 27 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
d89851b
Upgrade vitessce-python dependency
NickAkhmetov Jul 18, 2023
7b15721
replace deprecated view types with new ones
NickAkhmetov Jul 19, 2023
458f8e7
update changed names
NickAkhmetov Aug 3, 2023
5a6c25c
CODEX Cytokit + SPRM config works
NickAkhmetov Aug 4, 2023
32611d4
snATAC-seq config works
NickAkhmetov Aug 4, 2023
e50b202
snRNA-seq Salmon works
NickAkhmetov Aug 4, 2023
574645b
MIBI DeepCell+SPRM works
NickAkhmetov Aug 7, 2023
56175b0
snRNA-seq (SNARE-seq2) [Salmon] works
NickAkhmetov Aug 8, 2023
60998fc
handle coordination values for cases with multiple obsLabels, handle …
NickAkhmetov Aug 8, 2023
a7a021c
autopep8 fixes
NickAkhmetov Aug 9, 2023
0912a8b
more lint fixes (mostly line length and imports)
NickAkhmetov Aug 9, 2023
374ab32
trailing whitespace fixes
NickAkhmetov Aug 9, 2023
9a94352
fix `replace_url_in_file` test
NickAkhmetov Aug 9, 2023
2fcd89f
update RNASeqViewConfBuilder test conf
NickAkhmetov Aug 9, 2023
f63ca81
update MultiImageSPRMAnndataViewConfBuilder test
NickAkhmetov Aug 9, 2023
dd5dffa
ATACSeqViewConfBuilder test fixed
NickAkhmetov Aug 9, 2023
b147bbe
updated `StitchedCytokitSPRMViewConfBuilder` test
NickAkhmetov Aug 9, 2023
827d689
use obs sets instead of label coordinations to keep tooltips usable, …
NickAkhmetov Aug 9, 2023
acb3948
update seqfish
NickAkhmetov Aug 9, 2023
61f8af6
update spatialrnaseqanndatazarrviewconfigbuilder marker gene test case
NickAkhmetov Aug 9, 2023
031f3af
update remaining test fixtures
NickAkhmetov Aug 9, 2023
0c81a01
comment out currently unused util function to keep it from affecting …
NickAkhmetov Aug 9, 2023
b360c48
Use pending changes from vitessce 3.0.7
NickAkhmetov Aug 10, 2023
6e7b75a
update fixtures
NickAkhmetov Aug 11, 2023
c740420
Removed outdated comment
NickAkhmetov Aug 14, 2023
1a55416
Add `_schema_version` to base builder with a default of 1.0.15
NickAkhmetov Aug 14, 2023
ce8a25d
Add `create_coordination_values` helper util
NickAkhmetov Aug 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"editor.rulers": [100]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this .vscode config as a guide to avoid long comment lines that broke lint

}
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.12
0.0.13
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package_dir =
packages = find:
python_requires = >=3.7
install_requires =
vitessce==1.0.9
vitessce==3.0.7
hubmap-commons>=2.0.12
requests>=2.27.1
nbformat==5.1.3
Expand Down
70 changes: 44 additions & 26 deletions src/portal_visualization/builders/anndata_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
VitessceConfig,
AnnDataWrapper,
Component as cm,
CoordinationType,
CoordinationType as ct
)

import numpy as np
import zarr

Expand All @@ -14,6 +15,17 @@
from ..utils import get_conf_cells


RNA_SEQ_ANNDATA_FACTOR_PATHS = [f"obs/{key}" for key in [
"marker_gene_0",
"marker_gene_1",
"marker_gene_2",
"marker_gene_3",
"marker_gene_4"
]]

RNA_SEQ_FACTOR_LABEL_NAMES = [f'Marker Gene {i}' for i in range(len(RNA_SEQ_ANNDATA_FACTOR_PATHS))]


class RNASeqAnnDataZarrViewConfBuilder(ViewConfBuilder):
"""Wrapper class for creating a AnnData-backed view configuration
for "second generation" post-August 2020 RNA-seq data from anndata-to-ui.cwl like
Expand Down Expand Up @@ -50,7 +62,7 @@ def get_conf_cells(self, marker=None):
if f'{zarr_path}/.zgroup' not in file_paths_found:
message = f'RNA-seq assay with uuid {self._uuid} has no .zarr store at {zarr_path}'
raise FileNotFoundError(message)
vc = VitessceConfig(name=self._uuid)
vc = VitessceConfig(name=self._uuid, schema_version=self._schema_version)
adata_url = self._build_assets_url(zarr_path, use_token=False)
# Some of the keys (like marker_genes_for_heatmap) here are from our pipeline
# https://github.com/hubmapconsortium/portal-containers/blob/master/containers/anndata-to-ui
Expand All @@ -65,12 +77,12 @@ def get_conf_cells(self, marker=None):
if (any(['azimuth-annotate' in dag['origin'] for dag in dags])):
if self.is_annotated:
if 'predicted.ASCT.celltype' in z['obs']:
cell_set_obs.append("predicted.ASCT.celltype")
cell_set_obs.append("obs/predicted.ASCT.celltype")
cell_set_obs_names.append("Predicted ASCT Cell Type")
if 'predicted_label' in z['obs']:
cell_set_obs.append("predicted_label")
cell_set_obs.append("obs/predicted_label")
cell_set_obs_names.append("Cell Ontology Annotation")
cell_set_obs.append("leiden")
cell_set_obs.append("obs/leiden")
cell_set_obs_names.append("Leiden")
gene_alias = 'var/hugo_symbol' if 'var' in z and 'hugo_symbol' in z['var'] else None
if (gene_alias is not None and marker is not None):
Expand Down Expand Up @@ -117,22 +129,21 @@ def get_conf_cells(self, marker=None):

dataset = vc.add_dataset(name=self._uuid).add_object(AnnDataWrapper(
adata_url=adata_url,
mappings_obsm=["X_umap"],
mappings_obsm_names=["UMAP"],
spatial_centroid_obsm=("X_spatial" if self._is_spatial else None),
cell_set_obs=cell_set_obs,
cell_set_obs_names=cell_set_obs_names,
expression_matrix="X",
matrix_gene_var_filter="marker_genes_for_heatmap",
factors_obs=[
"marker_gene_0",
"marker_gene_1",
"marker_gene_2",
"marker_gene_3",
"marker_gene_4"
],
obs_feature_matrix_path="X",
initial_feature_filter_path="var/marker_genes_for_heatmap",
obs_set_paths=cell_set_obs,
obs_set_names=cell_set_obs_names,
obs_locations_path="obsm/X_spatial" if self._is_spatial else None,
obs_segmentations_path=None,
obs_embedding_paths=["obsm/X_umap"],
obs_embedding_names=["UMAP"],
obs_embedding_dims=[[0, 1]],
request_init=self._get_request_init(),
gene_alias=gene_alias
feature_labels_path=gene_alias,
coordination_values=None,
gene_alias=gene_alias,
obs_labels_paths=RNA_SEQ_ANNDATA_FACTOR_PATHS,
obs_labels_names=RNA_SEQ_FACTOR_LABEL_NAMES
))

vc = self._setup_anndata_view_config(vc, dataset, marker)
Expand All @@ -142,23 +153,23 @@ def _setup_anndata_view_config(self, vc, dataset, marker=None):
scatterplot = vc.add_view(
cm.SCATTERPLOT, dataset=dataset, mapping="UMAP", x=0, y=0, w=self._scatterplot_w, h=6)
cell_sets = vc.add_view(
cm.CELL_SETS,
cm.OBS_SETS,
dataset=dataset,
x=self._scatterplot_w + self._spatial_w,
y=0,
w=12 - self._scatterplot_w - self._spatial_w,
h=3
)
gene_list = vc.add_view(
cm.GENES,
cm.FEATURE_LIST,
dataset=dataset,
x=self._scatterplot_w + self._spatial_w,
y=4,
w=12 - self._scatterplot_w - self._spatial_w,
h=3
)
cell_sets_expr = vc.add_view(
cm.CELL_SET_EXPRESSION, dataset=dataset, x=7, y=6, w=5, h=4)
cm.OBS_SET_FEATURE_VALUE_DISTRIBUTION, dataset=dataset, x=7, y=6, w=5, h=4)
heatmap = vc.add_view(
cm.HEATMAP, dataset=dataset, x=0, y=6, w=7, h=4)
# Adding heatmap to coordination doesn't do anything,
Expand All @@ -173,11 +184,18 @@ def _setup_anndata_view_config(self, vc, dataset, marker=None):
views = list(filter(lambda v: v is not None, [
cell_sets, gene_list, scatterplot, cell_sets_expr, heatmap, spatial]))

# Link top 5 marker genes
vc.link_views(views,
[ct.OBS_LABELS_TYPE for _ in RNA_SEQ_FACTOR_LABEL_NAMES],
RNA_SEQ_FACTOR_LABEL_NAMES,
allow_multiple_scopes_per_type=True)

# Link user-provided marker gene
if marker:
vc.link_views(
views,
[CoordinationType.GENE_SELECTION, CoordinationType.CELL_COLOR_ENCODING],
[[marker], "geneSelection"]
[ct.FEATURE_SELECTION, ct.OBS_COLOR_ENCODING],
[[marker], 'geneSelection'],
)

return vc
Expand Down Expand Up @@ -210,7 +228,7 @@ def _add_spatial_view(self, dataset, vc):
y=0,
w=self._spatial_w,
h=6)
[cells_layer] = vc.add_coordination('spatialCellsLayer')
[cells_layer] = vc.add_coordination('spatialSegmentationLayer')
cells_layer.set_value(
{
"visible": True,
Expand Down
12 changes: 8 additions & 4 deletions src/portal_visualization/builders/base_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(self, entity, groups_token, assets_endpoint, **kwargs):
self._assets_endpoint = assets_endpoint
self._entity = entity
self._files = []
self._schema_version = kwargs.get("schema_version", "1.0.15")

@abstractmethod
def get_conf_cells(self, **kwargs): # pragma: no cover
Expand All @@ -42,15 +43,18 @@ def _replace_url_in_file(self, file):
... entity={ "uuid": "uuid" },
... groups_token='groups_token',
... assets_endpoint='https://example.com')
>>> file = { 'data_type': 'CELLS', 'file_type': 'cells.json', 'rel_path': 'cells.json' }
>>> file = {
... 'file_type': 'cells.json',
... 'rel_path': 'cells.json',
... 'coordination_values': { 'obsType': 'cell' } }
>>> pprint(builder._replace_url_in_file(file))
{'data_type': 'CELLS',\n\
'file_type': 'cells.json',\n\
{'coordination_values': {'obsType': 'cell'},
'file_type': 'cells.json',
'url': 'https://example.com/uuid/cells.json?token=groups_token'}
"""

return {
"data_type": file["data_type"],
"coordination_values": file["coordination_values"],
"file_type": file["file_type"],
"url": self._build_assets_url(file["rel_path"]),
}
Expand Down
4 changes: 2 additions & 2 deletions src/portal_visualization/builders/imaging_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def get_conf_cells(self, **kwargs):
message = f"Image pyramid assay with uuid {self._uuid} has no matching files"
raise FileNotFoundError(message)

vc = VitessceConfig(name="HuBMAP Data Portal")
vc = VitessceConfig(name="HuBMAP Data Portal", schema_version=self._schema_version)
dataset = vc.add_dataset(name="Visualization Files")
images = []
for img_path in found_images:
Expand Down Expand Up @@ -156,7 +156,7 @@ def get_conf_cells(self, **kwargs):
for images in images_by_pos:
image_wrappers = []
pos_name = self._get_pos_name(images[0])
vc = VitessceConfig(name=pos_name)
vc = VitessceConfig(name=pos_name, schema_version=self._schema_version)
dataset = vc.add_dataset(name=pos_name)
sorted_images = sorted(images, key=self._get_hybcycle)
for img_path in sorted_images:
Expand Down
49 changes: 36 additions & 13 deletions src/portal_visualization/builders/scatterplot_builders.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from vitessce import (
VitessceConfig,
Component as cm,
DataType as dt,
FileType as ft,
Component as cm,
)


from ..utils import get_conf_cells
from ..utils import create_coordination_values, get_conf_cells
from ..paths import SCRNA_SEQ_DIR, SCATAC_SEQ_DIR
from .base_builders import ViewConfBuilder

Expand All @@ -27,17 +26,18 @@ def get_conf_cells(self, **kwargs):
message = f'Files for uuid "{self._uuid}" not found as expected: ' \
f'Expected: {file_paths_expected}; Found: {file_paths_found}'
raise FileNotFoundError(message)
vc = VitessceConfig(name="HuBMAP Data Portal")
vc = VitessceConfig(name="HuBMAP Data Portal", schema_version=self._schema_version)
dataset = vc.add_dataset(name="Visualization Files")
# The sublcass initializes _files in its __init__ method
for file in self._files:
print(f"Adding file: {file}")
dataset = dataset.add_file(**(self._replace_url_in_file(file)))
vc = self._setup_scatterplot_view_config(vc, dataset)
return get_conf_cells(vc)

def _setup_scatterplot_view_config(self, vc, dataset):
vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping="UMAP", x=0, y=0, w=9, h=12)
vc.add_view(cm.CELL_SETS, dataset=dataset, x=9, y=0, w=3, h=12)
vc.add_view(cm.OBS_SETS, dataset=dataset, x=9, y=0, w=3, h=12)
return vc


Expand All @@ -53,13 +53,23 @@ def __init__(self, entity, groups_token, assets_endpoint, **kwargs):
self._files = [
{
"rel_path": f"{SCRNA_SEQ_DIR}.cells.json",
"file_type": ft.CELLS_JSON,
"data_type": dt.CELLS,
"file_type": ft.OBS_SEGMENTATIONS_CELLS_JSON,
"coordination_values": create_coordination_values(),
},
{
"rel_path": f"{SCRNA_SEQ_DIR}.cells.json",
"file_type": ft.OBS_LOCATIONS_CELLS_JSON,
"coordination_values": create_coordination_values()
},
{
"rel_path": f"{SCRNA_SEQ_DIR}.cells.json",
"file_type": ft.OBS_EMBEDDING_CELLS_JSON,
"coordination_values": create_coordination_values(embeddingType="UMAP")
},
{
"rel_path": f"{SCRNA_SEQ_DIR}.cell-sets.json",
"file_type": ft.CELL_SETS_JSON,
"data_type": dt.CELL_SETS,
"file_type": ft.OBS_SETS_CELL_SETS_JSON,
"coordination_values": create_coordination_values()
},
]

Expand All @@ -73,17 +83,30 @@ class ATACSeqViewConfBuilder(AbstractScatterplotViewConfBuilder):
def __init__(self, entity, groups_token, assets_endpoint, **kwargs):
super().__init__(entity, groups_token, assets_endpoint, **kwargs)
# All "file" Vitessce objects that do not have wrappers.

self._files = [
{
"rel_path": SCATAC_SEQ_DIR
+ "/umap_coords_clusters.cells.json",
"file_type": ft.CELLS_JSON,
"data_type": dt.CELLS,
"file_type": ft.OBS_SEGMENTATIONS_CELLS_JSON,
"coordination_values": create_coordination_values()
},
{
"rel_path": SCATAC_SEQ_DIR
+ "/umap_coords_clusters.cells.json",
"file_type": ft.OBS_LOCATIONS_CELLS_JSON,
"coordination_values": create_coordination_values()
},
{
"rel_path": SCATAC_SEQ_DIR
+ "/umap_coords_clusters.cells.json",
"file_type": ft.OBS_EMBEDDING_CELLS_JSON,
"coordination_values": create_coordination_values(embeddingType="UMAP")
},
{
"rel_path": SCATAC_SEQ_DIR
+ "/umap_coords_clusters.cell-sets.json",
"file_type": ft.CELL_SETS_JSON,
"data_type": dt.CELL_SETS,
"file_type": ft.OBS_SETS_CELL_SETS_JSON,
"coordination_values": create_coordination_values()
},
]
Loading
Loading