-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NickAkhmetov/HMP-159 Upgrade vitessce-python dependency #81
Changes from 25 commits
d89851b
7b15721
458f8e7
5a6c25c
32611d4
e50b202
574645b
56175b0
60998fc
a7a021c
0912a8b
374ab32
9a94352
2fcd89f
f63ca81
dd5dffa
b147bbe
827d689
acb3948
61f8af6
031f3af
0c81a01
b360c48
6e7b75a
c740420
1a55416
ce8a25d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"editor.rulers": [100] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
0.0.12 | ||
0.0.13 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,8 +4,9 @@ | |
VitessceConfig, | ||
AnnDataWrapper, | ||
Component as cm, | ||
CoordinationType, | ||
CoordinationType as ct | ||
) | ||
|
||
import numpy as np | ||
import zarr | ||
|
||
|
@@ -14,6 +15,17 @@ | |
from ..utils import get_conf_cells | ||
|
||
|
||
RNA_SEQ_ANNDATA_FACTOR_PATHS = [f"obs/{key}" for key in [ | ||
"marker_gene_0", | ||
"marker_gene_1", | ||
"marker_gene_2", | ||
"marker_gene_3", | ||
"marker_gene_4" | ||
]] | ||
|
||
RNA_SEQ_FACTOR_LABEL_NAMES = [f'Marker Gene {i}' for i in range(len(RNA_SEQ_ANNDATA_FACTOR_PATHS))] | ||
|
||
|
||
class RNASeqAnnDataZarrViewConfBuilder(ViewConfBuilder): | ||
"""Wrapper class for creating a AnnData-backed view configuration | ||
for "second generation" post-August 2020 RNA-seq data from anndata-to-ui.cwl like | ||
|
@@ -50,7 +62,7 @@ def get_conf_cells(self, marker=None): | |
if f'{zarr_path}/.zgroup' not in file_paths_found: | ||
message = f'RNA-seq assay with uuid {self._uuid} has no .zarr store at {zarr_path}' | ||
raise FileNotFoundError(message) | ||
vc = VitessceConfig(name=self._uuid) | ||
vc = VitessceConfig(name=self._uuid, schema_version='1.0.15') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we define There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great call! I've added this as a private field in the |
||
adata_url = self._build_assets_url(zarr_path, use_token=False) | ||
# Some of the keys (like marker_genes_for_heatmap) here are from our pipeline | ||
# https://github.com/hubmapconsortium/portal-containers/blob/master/containers/anndata-to-ui | ||
|
@@ -65,12 +77,12 @@ def get_conf_cells(self, marker=None): | |
if (any(['azimuth-annotate' in dag['origin'] for dag in dags])): | ||
if self.is_annotated: | ||
if 'predicted.ASCT.celltype' in z['obs']: | ||
cell_set_obs.append("predicted.ASCT.celltype") | ||
cell_set_obs.append("obs/predicted.ASCT.celltype") | ||
cell_set_obs_names.append("Predicted ASCT Cell Type") | ||
if 'predicted_label' in z['obs']: | ||
cell_set_obs.append("predicted_label") | ||
cell_set_obs.append("obs/predicted_label") | ||
cell_set_obs_names.append("Cell Ontology Annotation") | ||
cell_set_obs.append("leiden") | ||
cell_set_obs.append("obs/leiden") | ||
cell_set_obs_names.append("Leiden") | ||
gene_alias = 'var/hugo_symbol' if 'var' in z and 'hugo_symbol' in z['var'] else None | ||
if (gene_alias is not None and marker is not None): | ||
|
@@ -117,22 +129,21 @@ def get_conf_cells(self, marker=None): | |
|
||
dataset = vc.add_dataset(name=self._uuid).add_object(AnnDataWrapper( | ||
adata_url=adata_url, | ||
mappings_obsm=["X_umap"], | ||
mappings_obsm_names=["UMAP"], | ||
spatial_centroid_obsm=("X_spatial" if self._is_spatial else None), | ||
cell_set_obs=cell_set_obs, | ||
cell_set_obs_names=cell_set_obs_names, | ||
expression_matrix="X", | ||
matrix_gene_var_filter="marker_genes_for_heatmap", | ||
factors_obs=[ | ||
"marker_gene_0", | ||
"marker_gene_1", | ||
"marker_gene_2", | ||
"marker_gene_3", | ||
"marker_gene_4" | ||
], | ||
obs_feature_matrix_path="X", | ||
initial_feature_filter_path="var/marker_genes_for_heatmap", | ||
obs_set_paths=cell_set_obs, | ||
obs_set_names=cell_set_obs_names, | ||
obs_locations_path="obsm/X_spatial" if self._is_spatial else None, | ||
obs_segmentations_path=None, | ||
obs_embedding_paths=["obsm/X_umap"], | ||
obs_embedding_names=["UMAP"], | ||
obs_embedding_dims=[[0, 1]], | ||
request_init=self._get_request_init(), | ||
gene_alias=gene_alias | ||
feature_labels_path=gene_alias, | ||
coordination_values=None, | ||
gene_alias=gene_alias, | ||
obs_labels_paths=RNA_SEQ_ANNDATA_FACTOR_PATHS, | ||
obs_labels_names=RNA_SEQ_FACTOR_LABEL_NAMES | ||
)) | ||
|
||
vc = self._setup_anndata_view_config(vc, dataset, marker) | ||
|
@@ -142,23 +153,23 @@ def _setup_anndata_view_config(self, vc, dataset, marker=None): | |
scatterplot = vc.add_view( | ||
cm.SCATTERPLOT, dataset=dataset, mapping="UMAP", x=0, y=0, w=self._scatterplot_w, h=6) | ||
cell_sets = vc.add_view( | ||
cm.CELL_SETS, | ||
cm.OBS_SETS, | ||
dataset=dataset, | ||
x=self._scatterplot_w + self._spatial_w, | ||
y=0, | ||
w=12 - self._scatterplot_w - self._spatial_w, | ||
h=3 | ||
) | ||
gene_list = vc.add_view( | ||
cm.GENES, | ||
cm.FEATURE_LIST, | ||
dataset=dataset, | ||
x=self._scatterplot_w + self._spatial_w, | ||
y=4, | ||
w=12 - self._scatterplot_w - self._spatial_w, | ||
h=3 | ||
) | ||
cell_sets_expr = vc.add_view( | ||
cm.CELL_SET_EXPRESSION, dataset=dataset, x=7, y=6, w=5, h=4) | ||
cm.OBS_SET_FEATURE_VALUE_DISTRIBUTION, dataset=dataset, x=7, y=6, w=5, h=4) | ||
heatmap = vc.add_view( | ||
cm.HEATMAP, dataset=dataset, x=0, y=6, w=7, h=4) | ||
# Adding heatmap to coordination doesn't do anything, | ||
|
@@ -173,11 +184,18 @@ def _setup_anndata_view_config(self, vc, dataset, marker=None): | |
views = list(filter(lambda v: v is not None, [ | ||
cell_sets, gene_list, scatterplot, cell_sets_expr, heatmap, spatial])) | ||
|
||
# Link top 5 marker genes | ||
vc.link_views(views, | ||
[ct.OBS_LABELS_TYPE for _ in RNA_SEQ_FACTOR_LABEL_NAMES], | ||
RNA_SEQ_FACTOR_LABEL_NAMES, | ||
allow_multiple_scopes_per_type=True) | ||
|
||
# Link user-provided marker gene | ||
if marker: | ||
vc.link_views( | ||
views, | ||
[CoordinationType.GENE_SELECTION, CoordinationType.CELL_COLOR_ENCODING], | ||
[[marker], "geneSelection"] | ||
[ct.FEATURE_SELECTION, ct.OBS_COLOR_ENCODING], | ||
[[marker], 'geneSelection'], | ||
) | ||
|
||
return vc | ||
|
@@ -210,7 +228,7 @@ def _add_spatial_view(self, dataset, vc): | |
y=0, | ||
w=self._spatial_w, | ||
h=6) | ||
[cells_layer] = vc.add_coordination('spatialCellsLayer') | ||
[cells_layer] = vc.add_coordination('spatialSegmentationLayer') | ||
cells_layer.set_value( | ||
{ | ||
"visible": True, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,7 @@ | ||
from vitessce import ( | ||
VitessceConfig, | ||
Component as cm, | ||
DataType as dt, | ||
FileType as ft, | ||
Component as cm, | ||
) | ||
|
||
|
||
|
@@ -27,17 +26,18 @@ def get_conf_cells(self, **kwargs): | |
message = f'Files for uuid "{self._uuid}" not found as expected: ' \ | ||
f'Expected: {file_paths_expected}; Found: {file_paths_found}' | ||
raise FileNotFoundError(message) | ||
vc = VitessceConfig(name="HuBMAP Data Portal") | ||
vc = VitessceConfig(name="HuBMAP Data Portal", schema_version="1.0.15") | ||
dataset = vc.add_dataset(name="Visualization Files") | ||
# The sublcass initializes _files in its __init__ method | ||
for file in self._files: | ||
print(f"Adding file: {file}") | ||
dataset = dataset.add_file(**(self._replace_url_in_file(file))) | ||
vc = self._setup_scatterplot_view_config(vc, dataset) | ||
return get_conf_cells(vc) | ||
|
||
def _setup_scatterplot_view_config(self, vc, dataset): | ||
vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping="UMAP", x=0, y=0, w=9, h=12) | ||
vc.add_view(cm.CELL_SETS, dataset=dataset, x=9, y=0, w=3, h=12) | ||
vc.add_view(cm.OBS_SETS, dataset=dataset, x=9, y=0, w=3, h=12) | ||
return vc | ||
|
||
|
||
|
@@ -53,13 +53,32 @@ def __init__(self, entity, groups_token, assets_endpoint, **kwargs): | |
self._files = [ | ||
{ | ||
"rel_path": f"{SCRNA_SEQ_DIR}.cells.json", | ||
"file_type": ft.CELLS_JSON, | ||
"data_type": dt.CELLS, | ||
"file_type": ft.OBS_SEGMENTATIONS_CELLS_JSON, | ||
"coordination_values": { | ||
"obsType": "cell", | ||
}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could it be worth introducing a util or similar for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added a
|
||
}, | ||
{ | ||
"rel_path": f"{SCRNA_SEQ_DIR}.cells.json", | ||
"file_type": ft.OBS_LOCATIONS_CELLS_JSON, | ||
"coordination_values": { | ||
"obsType": "cell", | ||
}, | ||
}, | ||
{ | ||
"rel_path": f"{SCRNA_SEQ_DIR}.cells.json", | ||
"file_type": ft.OBS_EMBEDDING_CELLS_JSON, | ||
"coordination_values": { | ||
"obsType": "cell", | ||
"embeddingType": "UMAP", | ||
}, | ||
}, | ||
{ | ||
"rel_path": f"{SCRNA_SEQ_DIR}.cell-sets.json", | ||
"file_type": ft.CELL_SETS_JSON, | ||
"data_type": dt.CELL_SETS, | ||
"file_type": ft.OBS_SETS_CELL_SETS_JSON, | ||
"coordination_values": { | ||
"obsType": "cell", | ||
}, | ||
}, | ||
] | ||
|
||
|
@@ -73,17 +92,39 @@ class ATACSeqViewConfBuilder(AbstractScatterplotViewConfBuilder): | |
def __init__(self, entity, groups_token, assets_endpoint, **kwargs): | ||
super().__init__(entity, groups_token, assets_endpoint, **kwargs) | ||
# All "file" Vitessce objects that do not have wrappers. | ||
|
||
self._files = [ | ||
{ | ||
"rel_path": SCATAC_SEQ_DIR | ||
+ "/umap_coords_clusters.cells.json", | ||
"file_type": ft.CELLS_JSON, | ||
"data_type": dt.CELLS, | ||
"file_type": ft.OBS_SEGMENTATIONS_CELLS_JSON, | ||
"coordination_values": { | ||
"obsType": "cell", | ||
}, | ||
}, | ||
{ | ||
"rel_path": SCATAC_SEQ_DIR | ||
+ "/umap_coords_clusters.cells.json", | ||
"file_type": ft.OBS_LOCATIONS_CELLS_JSON, | ||
"coordination_values": { | ||
"obsType": "cell", | ||
}, | ||
}, | ||
{ | ||
"rel_path": SCATAC_SEQ_DIR | ||
+ "/umap_coords_clusters.cells.json", | ||
"file_type": ft.OBS_EMBEDDING_CELLS_JSON, | ||
"coordination_values": { | ||
"obsType": "cell", | ||
"embeddingType": "UMAP", | ||
}, | ||
}, | ||
{ | ||
"rel_path": SCATAC_SEQ_DIR | ||
+ "/umap_coords_clusters.cell-sets.json", | ||
"file_type": ft.CELL_SETS_JSON, | ||
"data_type": dt.CELL_SETS, | ||
"file_type": ft.OBS_SETS_CELL_SETS_JSON, | ||
"coordination_values": { | ||
"obsType": "cell", | ||
}, | ||
}, | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added this .vscode config as a guide to avoid long comment lines that broke lint