From 6f49f977e2a6d0c81355960de1b002f68ff49f1a Mon Sep 17 00:00:00 2001 From: ghaith-mq Date: Thu, 10 Oct 2024 13:34:03 +0000 Subject: [PATCH 1/2] visium.py edited, included functions to calculate of pixel size and image shape --- src/spatialdata_io/readers/visium.py | 193 ++++++++++++++++++++++++++- 1 file changed, 192 insertions(+), 1 deletion(-) diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 22a75855..235d7666 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -22,6 +22,12 @@ from spatialdata_io._docs import inject_docs from spatialdata_io.readers._utils._utils import _read_counts + +from typing import List, Tuple, Union +import scanpy as sc +import spatialdata as sd +from enum import Enum + __all__ = ["visium"] @@ -241,7 +247,7 @@ def visium( transformations={"downscaled_lowres": Identity()}, rgb=None, ) - + return SpatialData(images=images, shapes=shapes, table=table) @@ -277,3 +283,188 @@ def _read_image(image_file: Path, imread_kwargs: dict[str, Any]) -> Any: else: raise ValueError(f"Image shape {im.shape} is not supported.") return image + + +def get_sdata_res(sdata:SpatialData): + ''' + Retrieve the image resolution from the Visium SpatialData object. + + This function extracts the shape (resolution) of the highest resolution image (scale0) + from the Visium SpatialData object. The shape is returned as the number of channels (c), + height (y), and width (x) in pixels. + + Parameters + ---------- + sdata : SpatialData + A SpatialData object containing images and spatial data, with image resolutions stored + in a multi-scale format. + + Returns + ------- + shape : tuple + A tuple containing the image shape in the format (channels, height, width): + - c: Number of image channels (typically RGB). + - y: Image height in pixels. + - x: Image width in pixels. + ''' + + image_name = list(sdata.images.keys())[0] + dimensions = sdata.images[image_name]['scale0'].dims + shape = dimensions['c'],dimensions['y'],dimensions['x'] + return shape + +class SpotPacking(Enum): + """Types of ST spots disposition, + for Orange Crate Packing see: + https://kb.10xgenomics.com/hc/en-us/articles/360041426992-Where-can-I-find-the-Space-Ranger-barcode-whitelist-and-their-coordinates-on-the-slide + """ + ORANGE_CRATE_PACKING = 0 + GRID_PACKING = 1 + + +def find_pixel_size_visium(my_df: pd.DataFrame, inter_spot_dist: float=100., packing: SpotPacking = SpotPacking.ORANGE_CRATE_PACKING) -> Tuple[float, int]: + """Estimate the pixel size of an image in um/px given a dataframe containing the spot coordinates in that image + + Args: + my_df (pd.DataFrame): dataframe containing the coordinates of each spot in an image, it must contain the following columns: + ['pxl_row_in_fullres', 'pxl_col_in_fullres', 'array_col', 'array_row'] + inter_spot_dist (float, optional): the distance in um between two spots on the same row. Defaults to 100.. + packing (SpotPacking, optional): disposition of the spots on the slide. Defaults to SpotPacking.ORANGE_CRATE_PACKING. + + Raises: + Exception: if cannot find two spots on the same row + + Returns: + Tuple[float, int]: approximation of the pixel size in um/px and over how many spots that pixel size was estimated + """ + def _cart_dist(start_spot, end_spot): + """cartesian distance in pixel between two spots""" + d = np.sqrt((start_spot['pxl_col_in_fullres'] - end_spot['pxl_col_in_fullres']) ** 2 \ + + (start_spot['pxl_row_in_fullres'] - end_spot['pxl_row_in_fullres']) ** 2) + return d + + df = my_df.copy() + + max_dist_col = 0 + approx_nb = 0 + best_approx = 0 + df = df.sort_values('array_row') + for _, row in df.iterrows(): + y = row['array_col'] + x = row['array_row'] + if len(df[df['array_row'] == x]) > 1: + b = df[df['array_row'] == x]['array_col'].idxmax() + start_spot = row + end_spot = df.loc[b] + dist_px = _cart_dist(start_spot, end_spot) + + div = 1 if packing == SpotPacking.GRID_PACKING else 2 + dist_col = abs(df.loc[b, 'array_col'] - y) // div + + approx_nb += 1 + + if dist_col > max_dist_col: + max_dist_col = dist_col + best_approx = inter_spot_dist / (dist_px / dist_col) + if approx_nb > 3: + break + + if approx_nb == 0: + raise Exception("Pixel size estimation failed. Couldn't find two spots on the same row") + + return best_approx, max_dist_col + +def create_df_coord_visium(data: SpatialData): + ''' + Create a DataFrame with coordinates and array indices from Visium SpatialData. + + This function processes the spatial shapes and table data from a Visium SpatialData object + to generate a DataFrame containing pixel coordinates (row and column) for each spot + on the tissue image at full resolution. It also includes the corresponding array row and column + indices from the data tables. + + Parameters + ---------- + data : SpatialData + A SpatialData object containing Visium spatial information, including shapes and table data + (spot coordinates and array indices). + + Returns + ------- + df_coord : pandas.DataFrame + A DataFrame with the following columns: + - 'pxl_row_in_fullres': Pixel row coordinates in full-resolution tissue image. + - 'pxl_col_in_fullres': Pixel column coordinates in full-resolution tissue image. + - 'array_row': Row index of the spot in the Visium array. + - 'array_col': Column index of the spot in the Visium array. + ''' + tissue_name = list(data.shapes.keys())[0] + shapes_df = data.shapes[tissue_name] + shapes_df['pxl_col_in_fullres'] = shapes_df.geometry.apply(lambda geom: geom.x) + shapes_df['pxl_row_in_fullres'] = shapes_df.geometry.apply(lambda geom: geom.y) + + + shapes_df['array_row'] = list(data.tables['table'].obs['array_row']) + shapes_df['array_col'] = list(data.tables['table'].obs['array_col']) + + # Now, you have the necessary DataFrame in the correct format: + df_coord = shapes_df[['pxl_row_in_fullres', 'pxl_col_in_fullres', 'array_row', 'array_col']] + return df_coord + + +def calculate_pixel_size_from_visium( + path: str, + dataset_id: str, + counts_file: str, + fullres_image_file: str, + tissue_positions_file: str, + scalefactors_file: str, + inter_spot_dist: float = 100.0 +) -> SpatialData: + """ + Main function to load data into a spatialdata class and calculate scale0 image shape and pixel size. + + Parameters + ---------- + path : str + Path to the directory containing the data. + dataset_id : str + ID of the dataset to use. + counts_file : str + Path to the filtered feature barcode matrix (counts file). + fullres_image_file : str + Path to the full-resolution image file (usually tissue_hires_image.png). + tissue_positions_file : str + Path to the tissue positions file (usually tissue_positions_list.csv). + scalefactors_file : str + Path to the scalefactors file (usually scalefactors_json.json). + inter_spot_dist : float, optional + Distance between 2 spots in a visium field. Default value = 100um. + + Returns + ------- + visium_sdata : SpatialData + SpatialData object that includes both image shape and pixel size stored in tables['table'].uns + """ + + # Load the SpatialData object using the visium function. 6 files are expected to be passed from Visium raw data + visium_sdata = visium( + path=path, + dataset_id=dataset_id, + counts_file=counts_file, + fullres_image_file=fullres_image_file, + tissue_positions_file=tissue_positions_file, + scalefactors_file=scalefactors_file + ) + + df_coord = create_df_coord_visium(visium_sdata) + + pixel_size, _ = find_pixel_size_visium(df_coord, inter_spot_dist) + + image_shape = get_sdata_res(visium_sdata) + + + visium_sdata.tables['table'].uns['image_shape'] = image_shape + visium_sdata.tables['table'].uns['pixel_size'] = pixel_size + + return visium_sdata \ No newline at end of file From 450bcd206416ee276230f3b73062164580995cfd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:25:36 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata_io/readers/visium.py | 121 ++++++++++++++------------- 1 file changed, 61 insertions(+), 60 deletions(-) diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 235d7666..e4d5a5cb 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -4,9 +4,10 @@ import os import re from collections.abc import Mapping +from enum import Enum from pathlib import Path from types import MappingProxyType -from typing import Any +from typing import Any, Tuple import numpy as np import pandas as pd @@ -22,12 +23,6 @@ from spatialdata_io._docs import inject_docs from spatialdata_io.readers._utils._utils import _read_counts - -from typing import List, Tuple, Union -import scanpy as sc -import spatialdata as sd -from enum import Enum - __all__ = ["visium"] @@ -247,7 +242,7 @@ def visium( transformations={"downscaled_lowres": Identity()}, rgb=None, ) - + return SpatialData(images=images, shapes=shapes, table=table) @@ -285,18 +280,18 @@ def _read_image(image_file: Path, imread_kwargs: dict[str, Any]) -> Any: return image -def get_sdata_res(sdata:SpatialData): - ''' +def get_sdata_res(sdata: SpatialData): + """ Retrieve the image resolution from the Visium SpatialData object. - This function extracts the shape (resolution) of the highest resolution image (scale0) - from the Visium SpatialData object. The shape is returned as the number of channels (c), + This function extracts the shape (resolution) of the highest resolution image (scale0) + from the Visium SpatialData object. The shape is returned as the number of channels (c), height (y), and width (x) in pixels. Parameters ---------- sdata : SpatialData - A SpatialData object containing images and spatial data, with image resolutions stored + A SpatialData object containing images and spatial data, with image resolutions stored in a multi-scale format. Returns @@ -306,23 +301,27 @@ def get_sdata_res(sdata:SpatialData): - c: Number of image channels (typically RGB). - y: Image height in pixels. - x: Image width in pixels. - ''' + """ image_name = list(sdata.images.keys())[0] - dimensions = sdata.images[image_name]['scale0'].dims - shape = dimensions['c'],dimensions['y'],dimensions['x'] + dimensions = sdata.images[image_name]["scale0"].dims + shape = dimensions["c"], dimensions["y"], dimensions["x"] return shape + class SpotPacking(Enum): - """Types of ST spots disposition, + """Types of ST spots disposition, for Orange Crate Packing see: - https://kb.10xgenomics.com/hc/en-us/articles/360041426992-Where-can-I-find-the-Space-Ranger-barcode-whitelist-and-their-coordinates-on-the-slide + https://kb.10xgenomics.com/hc/en-us/articles/360041426992-Where-can-I-find-the-Space-Ranger-barcode-whitelist-and-their-coordinates-on-the-slide """ + ORANGE_CRATE_PACKING = 0 GRID_PACKING = 1 -def find_pixel_size_visium(my_df: pd.DataFrame, inter_spot_dist: float=100., packing: SpotPacking = SpotPacking.ORANGE_CRATE_PACKING) -> Tuple[float, int]: +def find_pixel_size_visium( + my_df: pd.DataFrame, inter_spot_dist: float = 100.0, packing: SpotPacking = SpotPacking.ORANGE_CRATE_PACKING +) -> tuple[float, int]: """Estimate the pixel size of an image in um/px given a dataframe containing the spot coordinates in that image Args: @@ -337,56 +336,60 @@ def find_pixel_size_visium(my_df: pd.DataFrame, inter_spot_dist: float=100., pac Returns: Tuple[float, int]: approximation of the pixel size in um/px and over how many spots that pixel size was estimated """ + def _cart_dist(start_spot, end_spot): """cartesian distance in pixel between two spots""" - d = np.sqrt((start_spot['pxl_col_in_fullres'] - end_spot['pxl_col_in_fullres']) ** 2 \ - + (start_spot['pxl_row_in_fullres'] - end_spot['pxl_row_in_fullres']) ** 2) + d = np.sqrt( + (start_spot["pxl_col_in_fullres"] - end_spot["pxl_col_in_fullres"]) ** 2 + + (start_spot["pxl_row_in_fullres"] - end_spot["pxl_row_in_fullres"]) ** 2 + ) return d - + df = my_df.copy() - + max_dist_col = 0 approx_nb = 0 best_approx = 0 - df = df.sort_values('array_row') + df = df.sort_values("array_row") for _, row in df.iterrows(): - y = row['array_col'] - x = row['array_row'] - if len(df[df['array_row'] == x]) > 1: - b = df[df['array_row'] == x]['array_col'].idxmax() + y = row["array_col"] + x = row["array_row"] + if len(df[df["array_row"] == x]) > 1: + b = df[df["array_row"] == x]["array_col"].idxmax() start_spot = row end_spot = df.loc[b] dist_px = _cart_dist(start_spot, end_spot) - + div = 1 if packing == SpotPacking.GRID_PACKING else 2 - dist_col = abs(df.loc[b, 'array_col'] - y) // div - + dist_col = abs(df.loc[b, "array_col"] - y) // div + approx_nb += 1 - + if dist_col > max_dist_col: max_dist_col = dist_col best_approx = inter_spot_dist / (dist_px / dist_col) if approx_nb > 3: break - + if approx_nb == 0: raise Exception("Pixel size estimation failed. Couldn't find two spots on the same row") - + return best_approx, max_dist_col + def create_df_coord_visium(data: SpatialData): - ''' + """ Create a DataFrame with coordinates and array indices from Visium SpatialData. - This function processes the spatial shapes and table data from a Visium SpatialData object - to generate a DataFrame containing pixel coordinates (row and column) for each spot - on the tissue image at full resolution. It also includes the corresponding array row and column + This function processes the spatial shapes and table data from a Visium SpatialData object + to generate a DataFrame containing pixel coordinates (row and column) for each spot + on the tissue image at full resolution. It also includes the corresponding array row and column indices from the data tables. Parameters ---------- data : SpatialData - A SpatialData object containing Visium spatial information, including shapes and table data + A SpatialData object containing Visium spatial information, including shapes and table data (spot coordinates and array indices). Returns @@ -397,29 +400,28 @@ def create_df_coord_visium(data: SpatialData): - 'pxl_col_in_fullres': Pixel column coordinates in full-resolution tissue image. - 'array_row': Row index of the spot in the Visium array. - 'array_col': Column index of the spot in the Visium array. - ''' + """ tissue_name = list(data.shapes.keys())[0] shapes_df = data.shapes[tissue_name] - shapes_df['pxl_col_in_fullres'] = shapes_df.geometry.apply(lambda geom: geom.x) - shapes_df['pxl_row_in_fullres'] = shapes_df.geometry.apply(lambda geom: geom.y) - + shapes_df["pxl_col_in_fullres"] = shapes_df.geometry.apply(lambda geom: geom.x) + shapes_df["pxl_row_in_fullres"] = shapes_df.geometry.apply(lambda geom: geom.y) - shapes_df['array_row'] = list(data.tables['table'].obs['array_row']) - shapes_df['array_col'] = list(data.tables['table'].obs['array_col']) + shapes_df["array_row"] = list(data.tables["table"].obs["array_row"]) + shapes_df["array_col"] = list(data.tables["table"].obs["array_col"]) # Now, you have the necessary DataFrame in the correct format: - df_coord = shapes_df[['pxl_row_in_fullres', 'pxl_col_in_fullres', 'array_row', 'array_col']] + df_coord = shapes_df[["pxl_row_in_fullres", "pxl_col_in_fullres", "array_row", "array_col"]] return df_coord def calculate_pixel_size_from_visium( - path: str, + path: str, dataset_id: str, - counts_file: str, - fullres_image_file: str, - tissue_positions_file: str, + counts_file: str, + fullres_image_file: str, + tissue_positions_file: str, scalefactors_file: str, - inter_spot_dist: float = 100.0 + inter_spot_dist: float = 100.0, ) -> SpatialData: """ Main function to load data into a spatialdata class and calculate scale0 image shape and pixel size. @@ -446,7 +448,7 @@ def calculate_pixel_size_from_visium( visium_sdata : SpatialData SpatialData object that includes both image shape and pixel size stored in tables['table'].uns """ - + # Load the SpatialData object using the visium function. 6 files are expected to be passed from Visium raw data visium_sdata = visium( path=path, @@ -454,17 +456,16 @@ def calculate_pixel_size_from_visium( counts_file=counts_file, fullres_image_file=fullres_image_file, tissue_positions_file=tissue_positions_file, - scalefactors_file=scalefactors_file + scalefactors_file=scalefactors_file, ) - + df_coord = create_df_coord_visium(visium_sdata) pixel_size, _ = find_pixel_size_visium(df_coord, inter_spot_dist) - + image_shape = get_sdata_res(visium_sdata) - - visium_sdata.tables['table'].uns['image_shape'] = image_shape - visium_sdata.tables['table'].uns['pixel_size'] = pixel_size - - return visium_sdata \ No newline at end of file + visium_sdata.tables["table"].uns["image_shape"] = image_shape + visium_sdata.tables["table"].uns["pixel_size"] = pixel_size + + return visium_sdata