Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parquet support for visium HD #2992

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions scanpy/readwrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ def read_visium(
library_id: str | None = None,
load_images: bool | None = True,
source_image_path: Path | str | None = None,
spaceranger_image_path: Path | str | None = None,
) -> AnnData:
"""\
Read 10x-Genomics-formatted visum dataset.
Expand Down Expand Up @@ -383,6 +384,9 @@ def read_visium(
source_image_path
Path to the high-resolution tissue image. Path will be included in
`.uns["spatial"][library_id]["metadata"]["source_image_path"]`.
spaceranger_image_path
Path to the folder containing the spaceranger output hires/lowres tissue images. If `None`,
will go with the `spatial` folder of the provided `path`.

Returns
-------
Expand Down Expand Up @@ -415,6 +419,12 @@ def read_visium(
Spatial spot coordinates, usable as `basis` by :func:`~scanpy.pl.embedding`.
"""
path = Path(path)
#if not provided, assume the hires/lowres images are in the same folder as everything
#except in the spatial subdirectory
if spaceranger_image_path is None:
spaceranger_image_path = path / "spatial"
else:
spaceranger_image_path = Path(spaceranger_image_path)
adata = read_10x_h5(path / count_file, genome=genome)

adata.uns["spatial"] = dict()
Expand All @@ -432,13 +442,14 @@ def read_visium(
tissue_positions_file = (
path / "spatial/tissue_positions.csv"
if (path / "spatial/tissue_positions.csv").exists()
else path / "spatial/tissue_positions.parquet" if (path / "spatial/tissue_positions.parquet").exists()
else path / "spatial/tissue_positions_list.csv"
)
files = dict(
tissue_positions_file=tissue_positions_file,
scalefactors_json_file=path / "spatial/scalefactors_json.json",
hires_image=path / "spatial/tissue_hires_image.png",
lowres_image=path / "spatial/tissue_lowres_image.png",
hires_image=spaceranger_image_path / "tissue_hires_image.png",
lowres_image=spaceranger_image_path / "tissue_lowres_image.png",
)

# check if files exists, continue if images are missing
Expand Down Expand Up @@ -473,11 +484,16 @@ def read_visium(
}

# read coordinates
positions = pd.read_csv(
files["tissue_positions_file"],
header=0 if tissue_positions_file.name == "tissue_positions.csv" else None,
index_col=0,
)
if files["tissue_positions_file"].name.endswith(".csv"):
positions = pd.read_csv(
files["tissue_positions_file"],
header=0 if tissue_positions_file.name == "tissue_positions.csv" else None,
index_col=0,
)
elif files["tissue_positions_file"].name.endswith(".parquet"):
positions = pd.read_parquet(files["tissue_positions_file"])
#need to set the barcode to be the index
positions.set_index("barcode", inplace=True)
positions.columns = [
"in_tissue",
"array_row",
Expand Down