From 8e23732c36c46c036870f972f96405b290b16bb3 Mon Sep 17 00:00:00 2001 From: ktpolanski Date: Tue, 9 Apr 2024 13:01:13 +0000 Subject: [PATCH 1/2] parquet support for visium HD --- scanpy/readwrite.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/scanpy/readwrite.py b/scanpy/readwrite.py index 9bac25bf4e..c64849c4fd 100644 --- a/scanpy/readwrite.py +++ b/scanpy/readwrite.py @@ -432,6 +432,7 @@ def read_visium( tissue_positions_file = ( path / "spatial/tissue_positions.csv" if (path / "spatial/tissue_positions.csv").exists() + else path / "spatial/tissue_positions.parquet" if (path / "spatial/tissue_positions.parquet").exists() else path / "spatial/tissue_positions_list.csv" ) files = dict( @@ -473,11 +474,16 @@ def read_visium( } # read coordinates - positions = pd.read_csv( - files["tissue_positions_file"], - header=0 if tissue_positions_file.name == "tissue_positions.csv" else None, - index_col=0, - ) + if files["tissue_positions_file"].name.endswith(".csv"): + positions = pd.read_csv( + files["tissue_positions_file"], + header=0 if tissue_positions_file.name == "tissue_positions.csv" else None, + index_col=0, + ) + elif files["tissue_positions_file"].name.endswith(".parquet"): + positions = pd.read_parquet(files["tissue_positions_file"]) + #need to set the barcode to be the index + positions.set_index("barcode", inplace=True) positions.columns = [ "in_tissue", "array_row", From 2dd3c9ed268bc291117cc5276d056da2f19bef3a Mon Sep 17 00:00:00 2001 From: ktpolanski Date: Mon, 15 Jul 2024 13:06:23 +0000 Subject: [PATCH 2/2] spaceranger_image_path to account for split off spatial folder --- scanpy/readwrite.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scanpy/readwrite.py b/scanpy/readwrite.py index c64849c4fd..168420f719 100644 --- a/scanpy/readwrite.py +++ b/scanpy/readwrite.py @@ -356,6 +356,7 @@ def read_visium( library_id: str | None = None, load_images: bool | None = True, source_image_path: Path | str | None = None, + spaceranger_image_path: Path | str | None = None, ) -> AnnData: """\ Read 10x-Genomics-formatted visum dataset. @@ -383,6 +384,9 @@ def read_visium( source_image_path Path to the high-resolution tissue image. Path will be included in `.uns["spatial"][library_id]["metadata"]["source_image_path"]`. + spaceranger_image_path + Path to the folder containing the spaceranger output hires/lowres tissue images. If `None`, + will go with the `spatial` folder of the provided `path`. Returns ------- @@ -415,6 +419,12 @@ def read_visium( Spatial spot coordinates, usable as `basis` by :func:`~scanpy.pl.embedding`. """ path = Path(path) + #if not provided, assume the hires/lowres images are in the same folder as everything + #except in the spatial subdirectory + if spaceranger_image_path is None: + spaceranger_image_path = path / "spatial" + else: + spaceranger_image_path = Path(spaceranger_image_path) adata = read_10x_h5(path / count_file, genome=genome) adata.uns["spatial"] = dict() @@ -438,8 +448,8 @@ def read_visium( files = dict( tissue_positions_file=tissue_positions_file, scalefactors_json_file=path / "spatial/scalefactors_json.json", - hires_image=path / "spatial/tissue_hires_image.png", - lowres_image=path / "spatial/tissue_lowres_image.png", + hires_image=spaceranger_image_path / "tissue_hires_image.png", + lowres_image=spaceranger_image_path / "tissue_lowres_image.png", ) # check if files exists, continue if images are missing