Skip to content

Commit

Permalink
Support for H5AD-based AnnData in AnnDataWrapper (#361)
Browse files Browse the repository at this point in the history
* Update AnnDataWrapper to support h5ad files with ref spec json

* Lint

* Fix tests

* Update
  • Loading branch information
keller-mark authored Sep 9, 2024
1 parent 06a04e7 commit 94501cc
Show file tree
Hide file tree
Showing 8 changed files with 290 additions and 12 deletions.
3 changes: 2 additions & 1 deletion docs/data_examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ Data preparation examples

notebooks/data_export_s3
notebooks/data_export_files
notebooks/widget_brain_with_base_dir
notebooks/widget_brain_with_base_dir
notebooks/widget_brain_h5ad
177 changes: 177 additions & 0 deletions docs/notebooks/widget_brain_h5ad.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"nbsphinx": "hidden"
},
"source": [
"# Vitessce Widget Tutorial"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Visualization of single-cell RNA seq data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from os.path import join, isfile, isdir\n",
"from urllib.request import urlretrieve\n",
"from anndata import read_h5ad\n",
"import scanpy as sc\n",
"import json\n",
"\n",
"from vitessce import (\n",
" VitessceConfig,\n",
" Component as cm,\n",
" CoordinationType as ct,\n",
" AnnDataWrapper,\n",
")\n",
"from vitessce.data_utils import (\n",
" generate_h5ad_ref_spec\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 0. Download data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"h5_url = \"https://datasets.cellxgene.cziscience.com/84df8fa1-ab53-43c9-a439-95dcb9148265.h5ad\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"adata_filepath = join(\"data\", \"84df8fa1-ab53-43c9-a439-95dcb9148265.h5ad\")\n",
"if not isfile(adata_filepath):\n",
" os.makedirs(\"data\", exist_ok=True)\n",
" urlretrieve(h5_url, adata_filepath)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Create a Reference Spec JSON file for the H5AD file\n",
"\n",
"In order for Vitessce to load H5AD files, we also need to provide a corresponding [Reference Spec](https://fsspec.github.io/kerchunk/spec.html) JSON file which contains mappings between AnnData object keys and the byte offsets at which those AnnData object values begin within the H5AD file binary contents."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"json_filepath = join(\"data\", \"84df8fa1-ab53-43c9-a439-95dcb9148265.h5ad.reference.json\")\n",
"if not isfile(json_filepath):\n",
" ref_dict = generate_h5ad_ref_spec(h5_url)\n",
" with open(json_filepath, \"w\") as f:\n",
" json.dump(ref_dict, f)"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"## 2. Create the Vitessce widget configuration\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vc = VitessceConfig(schema_version=\"1.0.17\", name='Nakshatri et al', description='snRNA-seq analyses of breast tissues of healthy women of diverse genetic ancestry')\n",
"\n",
"dataset = vc.add_dataset(name='84df8fa1').add_object(AnnDataWrapper(\n",
" adata_path=adata_filepath,\n",
" ref_path=json_filepath, # We specify paths to both the H5AD and JSON files\n",
" obs_embedding_paths=[\"obsm/X_wnn.umap\"],\n",
" obs_embedding_names=[\"UMAP\"],\n",
" obs_set_paths=[\"obs/cell_type\"],\n",
" obs_set_names=[\"Cell Type\"],\n",
" obs_feature_matrix_path=\"X\",\n",
" )\n",
")\n",
"\n",
"scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n",
"cell_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n",
"cell_set_sizes = vc.add_view(cm.OBS_SET_SIZES, dataset=dataset)\n",
"genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n",
"\n",
"vc.layout((scatterplot | cell_sets) / (cell_set_sizes | genes));"
]
},
{
"cell_type": "markdown",
"metadata": {
"tags": []
},
"source": [
"## 3. Create the widget"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vw = vc.widget()\n",
"vw"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "vitessce"
version = "3.3.1"
version = "3.3.2"
authors = [
{ name="Mark Keller", email="mark_keller@hms.harvard.edu" },
]
Expand Down
41 changes: 41 additions & 0 deletions tests/test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,47 @@ def test_anndata_with_base_dir(self):
'obsSets': [{'path': 'obs/CellType', 'name': 'Cell Type'}]
}})

def test_anndata_with_h5ad_and_ref_json(self):
adata_path = data_path / 'test.h5ad'
ref_json_path = data_path / 'test.h5ad.ref.json'
w = AnnDataWrapper(adata_path, ref_path=ref_json_path,
obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'],
obs_labels_names=['Cell Label'], obs_labels_paths=['obs/CellLabel'],
obs_embedding_paths=['obsm/X_umap'], obs_embedding_names=['UMAP'])
w.local_file_uid = 'anndata.h5ad'
w.local_ref_uid = 'anndata.reference.json'

file_def_creator = w.make_file_def_creator('A', 0)
file_def = file_def_creator('http://localhost:8000')
self.assertEqual(file_def, {'fileType': 'anndata.h5ad', 'url': 'http://localhost:8000/A/0/anndata.h5ad',
'options': {
'refSpecUrl': 'http://localhost:8000/A/0/anndata.reference.json',
'obsEmbedding': [{'path': 'obsm/X_umap', 'embeddingType': 'UMAP', 'dims': [0, 1]}],
'obsSets': [{'path': 'obs/CellType', 'name': 'Cell Type'}],
'obsLabels': [{'path': 'obs/CellLabel', 'obsLabelsType': 'Cell Label'}]
}})

def test_anndata_with_h5ad_and_ref_json_with_base_dir(self):
adata_path = 'test.h5ad'
ref_json_path = 'test.h5ad.ref.json'
w = AnnDataWrapper(adata_path, ref_path=ref_json_path,
obs_set_paths=['obs/CellType'], obs_set_names=['Cell Type'],
obs_labels_names=['Cell Label'], obs_labels_paths=['obs/CellLabel'],
obs_embedding_paths=['obsm/X_umap'], obs_embedding_names=['UMAP'])
w.base_dir = data_path
w.local_file_uid = 'anndata.h5ad'
w.local_ref_uid = 'anndata.reference.json'

file_def_creator = w.make_file_def_creator('A', 0)
file_def = file_def_creator('http://localhost:8000')
self.assertEqual(file_def, {'fileType': 'anndata.h5ad', 'url': 'http://localhost:8000/test.h5ad',
'options': {
'refSpecUrl': 'http://localhost:8000/test.h5ad.ref.json',
'obsEmbedding': [{'path': 'obsm/X_umap', 'embeddingType': 'UMAP', 'dims': [0, 1]}],
'obsSets': [{'path': 'obs/CellType', 'name': 'Cell Type'}],
'obsLabels': [{'path': 'obs/CellLabel', 'obsLabelsType': 'Cell Label'}]
}})

def test_csv(self):
w = CsvWrapper(
csv_path=data_path / 'test.umap.csv',
Expand Down
1 change: 1 addition & 0 deletions vitessce/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class FileType(DocEnum):
An enum type representing the file format or schema to which a file conforms.
"""
ANNDATA_ZARR = "anndata.zarr", "Joint file type for AnnData objects"
ANNDATA_H5AD = "anndata.h5ad", "Joint file type for AnnData objects"
OBS_EMBEDDING_CSV = 'obsEmbedding.csv', "File type for obsEmbedding values stored in a CSV file"
OBS_LOCATIONS_CSV = 'obsLocations.csv', "File type for obsLocations values stored in a CSV file"
OBS_LABELS_CSV = 'obsLabels.csv', "File type for obsLabels values stored in a CSV file"
Expand Down
1 change: 1 addition & 0 deletions vitessce/data_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
sort_var_axis,
to_diamond,
VAR_CHUNK_SIZE,
generate_h5ad_ref_spec,
)
from .ome import (
rgb_img_to_ome_zarr,
Expand Down
6 changes: 3 additions & 3 deletions vitessce/widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,15 +432,15 @@ class VitessceWidget(anywidget.AnyWidget):

next_port = DEFAULT_PORT

js_package_version = Unicode('3.3.12').tag(sync=True)
js_package_version = Unicode('3.4.10').tag(sync=True)
js_dev_mode = Bool(False).tag(sync=True)
custom_js_url = Unicode('').tag(sync=True)
plugin_esm = List(trait=Unicode(''), default_value=[]).tag(sync=True)
remount_on_uid_change = Bool(True).tag(sync=True)

store_urls = List(trait=Unicode(''), default_value=[]).tag(sync=True)

def __init__(self, config, height=600, theme='auto', uid=None, port=None, proxy=False, js_package_version='3.3.12', js_dev_mode=False, custom_js_url='', plugins=None, remount_on_uid_change=True):
def __init__(self, config, height=600, theme='auto', uid=None, port=None, proxy=False, js_package_version='3.4.10', js_dev_mode=False, custom_js_url='', plugins=None, remount_on_uid_change=True):
"""
Construct a new Vitessce widget.
Expand Down Expand Up @@ -552,7 +552,7 @@ def _plugin_command(self, params, buffers):
# Launch Vitessce using plain HTML representation (no ipywidgets)


def ipython_display(config, height=600, theme='auto', base_url=None, host_name=None, uid=None, port=None, proxy=False, js_package_version='3.3.12', js_dev_mode=False, custom_js_url='', plugin_esm=DEFAULT_PLUGIN_ESM, remount_on_uid_change=True):
def ipython_display(config, height=600, theme='auto', base_url=None, host_name=None, uid=None, port=None, proxy=False, js_package_version='3.4.10', js_dev_mode=False, custom_js_url='', plugin_esm=DEFAULT_PLUGIN_ESM, remount_on_uid_change=True):
from IPython.display import display, HTML
uid_str = "vitessce" + get_uid_str(uid)

Expand Down
Loading

0 comments on commit 94501cc

Please sign in to comment.