Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NickAkhmetov/Add support for obs_labels_names and obs_labels_paths arrays #272

Merged
merged 4 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tests/create_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,11 @@ def create_test_anndata_file(h5ad_path):
'exPFC2',
'GABA2'
]
obs_cell_label_arr = [f'{obs_label}-label' for obs_label in obs_celltype_arr]
obs_df = pd.DataFrame(
data=[
{'index': i, 'CellType': ct}
for i, ct in zip(obs_index_arr, obs_celltype_arr)
{'index': i, 'CellType': ct, 'CellLabel': cl}
for i, ct, cl in zip(obs_index_arr, obs_celltype_arr, obs_cell_label_arr)
]
)
obsm = {"X_umap": np.array([[0, 1] for c in obs_index_arr])}
Expand Down
9 changes: 6 additions & 3 deletions tests/test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,19 @@ def test_ome_zarr_with_base_dir(self):

def test_anndata(self):
adata_path = data_path / 'test.h5ad.zarr'
w = AnnDataWrapper(adata_path, obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'], obs_embedding_paths=[
'obsm/X_umap'], obs_embedding_names=['UMAP'])
w = AnnDataWrapper(adata_path,
obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'],
obs_labels_names=['Cell Label'], obs_labels_paths=['obs/CellLabel'],
obs_embedding_paths=['obsm/X_umap'], obs_embedding_names=['UMAP'])
w.local_dir_uid = 'anndata.zarr'

file_def_creator = w.make_file_def_creator('A', 0)
file_def = file_def_creator('http://localhost:8000')
self.assertEqual(file_def, {'fileType': 'anndata.zarr', 'url': 'http://localhost:8000/A/0/anndata.zarr',
'options': {
'obsEmbedding': [{'path': 'obsm/X_umap', 'embeddingType': 'UMAP', 'dims': [0, 1]}],
'obsSets': [{'path': 'obs/CellType', 'name': 'Cell Type'}]
'obsSets': [{'path': 'obs/CellType', 'name': 'Cell Type'}],
'obsLabels': [{'path': 'obs/CellLabel', 'obsLabelsType': 'Cell Label'}]
}})

def test_anndata_with_base_dir(self):
Expand Down
38 changes: 27 additions & 11 deletions vitessce/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def create_image_json(self, img_url):


class AnnDataWrapper(AbstractWrapper):
def __init__(self, adata_path=None, adata_url=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, request_init=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, **kwargs):
def __init__(self, adata_path=None, adata_url=None, obs_feature_matrix_path=None, feature_filter_path=None, initial_feature_filter_path=None, obs_set_paths=None, obs_set_names=None, obs_locations_path=None, obs_segmentations_path=None, obs_embedding_paths=None, obs_embedding_names=None, obs_embedding_dims=None, request_init=None, feature_labels_path=None, obs_labels_path=None, convert_to_dense=True, coordination_values=None, obs_labels_paths=None, obs_labels_names=None, **kwargs):
"""
Wrap an AnnData object by creating an instance of the ``AnnDataWrapper`` class.

Expand All @@ -516,15 +516,17 @@ def __init__(self, adata_path=None, adata_url=None, obs_feature_matrix_path=None
:param str feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list.
:param str initial_feature_filter_path: A string like `var/highly_variable` used in conjunction with `obs_feature_matrix_path` if obs_feature_matrix_path points to a subset of `X` of the full `var` list.
:param list[str] obs_set_paths: Column names like `['obs/louvain', 'obs/cellType']` for showing cell sets
:param list[str] obs_set_names: Names to display in place of those in `obs_set_paths`, like `['Louvain', 'Cell Type']
:param list[str] obs_set_names: Names to display in place of those in `obs_set_paths`, like `['Louvain', 'Cell Type']`
:param str obs_locations_path: Column name in `obsm` that contains centroid coordinates for displaying centroids in the spatial viewer
:param str obs_segmentations_path: Column name in `obsm` that contains polygonal coordinates for displaying outlines in the spatial viewer
:param list[str] obs_embedding_paths: Column names like `['obsm/X_umap', 'obsm/X_pca']` for showing scatterplots
:param list[str] obs_embedding_names: Overriding names like `['UMAP', 'PCA'] for displaying above scatterplots
:param list[str] obs_embedding_dims: Dimensions along which to get data for the scatterplot, like [[0, 1], [4, 5]] where [0, 1] is just the normal x and y but [4, 5] could be comparing the third and fourth principal components, for example.
:param dict request_init: options to be passed along with every fetch request from the browser, like { "header": { "Authorization": "Bearer dsfjalsdfa1431" } }
:param list[str] obs_embedding_names: Overriding names like `['UMAP', 'PCA']` for displaying above scatterplots
:param list[str] obs_embedding_dims: Dimensions along which to get data for the scatterplot, like `[[0, 1], [4, 5]]` where `[0, 1]` is just the normal x and y but `[4, 5]` could be comparing the third and fourth principal components, for example.
:param dict request_init: options to be passed along with every fetch request from the browser, like `{ "header": { "Authorization": "Bearer dsfjalsdfa1431" } }`
:param str feature_labels_path: The name of a column containing feature labels (e.g., alternate gene symbols), instead of the default index in `var` of the AnnData store.
:param str obs_labels_path: The name of a column containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store.
:param str obs_labels_path: (DEPRECATED) The name of a column containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store. Use `obs_labels_paths` and `obs_labels_names` instead. This arg will be removed in a future release.
:param list[str] obs_labels_paths: The names of columns containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store.
:param list[str] obs_labels_names: The optional display names of columns containing observation labels (e.g., alternate cell IDs), instead of the default index in `obs` of the AnnData store.
:param bool convert_to_dense: Whether or not to convert `X` to dense the zarr store (dense is faster but takes more disk space).
:param coordination_values: Coordination values for the file definition.
:type coordination_values: dict or None
Expand Down Expand Up @@ -559,7 +561,13 @@ def __init__(self, adata_path=None, adata_url=None, obs_feature_matrix_path=None
self._mappings_obsm_dims = obs_embedding_dims
self._request_init = request_init
self._gene_alias = feature_labels_path
self._obs_labels_path = obs_labels_path
# Support legacy provision of single obs labels path
if (obs_labels_path is not None):
self._obs_labels_paths = [obs_labels_path]
self._obs_labels_names = [obs_labels_path.split('/')[-1]]
else:
self._obs_labels_paths = obs_labels_paths
self._obs_labels_names = obs_labels_names
self._convert_to_dense = convert_to_dense
self._coordination_values = coordination_values

Expand Down Expand Up @@ -642,10 +650,18 @@ def get_anndata_zarr(base_url):
options["featureLabels"] = {
"path": self._gene_alias
}
if self._obs_labels_path is not None:
options["obsLabels"] = {
"path": self._obs_labels_path
}
if self._obs_labels_paths is not None:
if self._obs_labels_names is not None and len(self._obs_labels_paths) == len(self._obs_labels_names):
# A name was provided for each path element, so use those values.
names = self._obs_labels_names
else:
# Names were not provided for each path element,
# so fall back to using the final part of each path for the names.
names = [labels_path.split('/')[-1] for labels_path in self._obs_labels_paths]
obs_labels = []
for path, name in zip(self._obs_labels_paths, names):
obs_labels.append({"path": path, "obsLabelsType": name})
options["obsLabels"] = obs_labels
if len(options.keys()) > 0:
obj_file_def = {
"fileType": ft.ANNDATA_ZARR.value,
Expand Down
Loading