From 6f49f977e2a6d0c81355960de1b002f68ff49f1a Mon Sep 17 00:00:00 2001
From: ghaith-mq <ghaith.mqawass@gmail.com>
Date: Thu, 10 Oct 2024 13:34:03 +0000
Subject: [PATCH 1/2] visium.py edited, included functions to calculate of
 pixel size and image shape

---
 src/spatialdata_io/readers/visium.py | 193 ++++++++++++++++++++++++++-
 1 file changed, 192 insertions(+), 1 deletion(-)

diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py
index 22a75855..235d7666 100644
--- a/src/spatialdata_io/readers/visium.py
+++ b/src/spatialdata_io/readers/visium.py
@@ -22,6 +22,12 @@
 from spatialdata_io._docs import inject_docs
 from spatialdata_io.readers._utils._utils import _read_counts
 
+
+from typing import List, Tuple, Union
+import scanpy as sc 
+import spatialdata as sd
+from enum import Enum
+
 __all__ = ["visium"]
 
 
@@ -241,7 +247,7 @@ def visium(
             transformations={"downscaled_lowres": Identity()},
             rgb=None,
         )
-
+        
     return SpatialData(images=images, shapes=shapes, table=table)
 
 
@@ -277,3 +283,188 @@ def _read_image(image_file: Path, imread_kwargs: dict[str, Any]) -> Any:
     else:
         raise ValueError(f"Image shape {im.shape} is not supported.")
     return image
+
+
+def get_sdata_res(sdata:SpatialData):
+    '''
+    Retrieve the image resolution from the Visium SpatialData object.
+
+    This function extracts the shape (resolution) of the highest resolution image (scale0) 
+    from the Visium SpatialData object. The shape is returned as the number of channels (c), 
+    height (y), and width (x) in pixels.
+
+    Parameters
+    ----------
+    sdata : SpatialData
+        A SpatialData object containing images and spatial data, with image resolutions stored 
+        in a multi-scale format.
+
+    Returns
+    -------
+    shape : tuple
+        A tuple containing the image shape in the format (channels, height, width):
+        - c: Number of image channels (typically RGB).
+        - y: Image height in pixels.
+        - x: Image width in pixels.
+    '''
+
+    image_name = list(sdata.images.keys())[0]
+    dimensions = sdata.images[image_name]['scale0'].dims
+    shape = dimensions['c'],dimensions['y'],dimensions['x']
+    return shape
+
+class SpotPacking(Enum):
+    """Types of ST spots disposition, 
+    for Orange Crate Packing see:
+    https://kb.10xgenomics.com/hc/en-us/articles/360041426992-Where-can-I-find-the-Space-Ranger-barcode-whitelist-and-their-coordinates-on-the-slide    
+    """
+    ORANGE_CRATE_PACKING = 0
+    GRID_PACKING = 1
+
+
+def find_pixel_size_visium(my_df: pd.DataFrame, inter_spot_dist: float=100., packing: SpotPacking = SpotPacking.ORANGE_CRATE_PACKING) -> Tuple[float, int]:
+    """Estimate the pixel size of an image in um/px given a dataframe containing the spot coordinates in that image
+
+    Args:
+        my_df (pd.DataFrame): dataframe containing the coordinates of each spot in an image, it must contain the following columns:
+            ['pxl_row_in_fullres', 'pxl_col_in_fullres', 'array_col', 'array_row']
+        inter_spot_dist (float, optional): the distance in um between two spots on the same row. Defaults to 100..
+        packing (SpotPacking, optional): disposition of the spots on the slide. Defaults to SpotPacking.ORANGE_CRATE_PACKING.
+
+    Raises:
+        Exception: if cannot find two spots on the same row
+
+    Returns:
+        Tuple[float, int]: approximation of the pixel size in um/px and over how many spots that pixel size was estimated
+    """
+    def _cart_dist(start_spot, end_spot):
+        """cartesian distance in pixel between two spots"""
+        d = np.sqrt((start_spot['pxl_col_in_fullres'] - end_spot['pxl_col_in_fullres']) ** 2 \
+            + (start_spot['pxl_row_in_fullres'] - end_spot['pxl_row_in_fullres']) ** 2)
+        return d
+    
+    df = my_df.copy()
+    
+    max_dist_col = 0
+    approx_nb = 0
+    best_approx = 0
+    df = df.sort_values('array_row')
+    for _, row in df.iterrows():
+        y = row['array_col']
+        x = row['array_row']
+        if len(df[df['array_row'] == x]) > 1:
+            b = df[df['array_row'] == x]['array_col'].idxmax()
+            start_spot = row
+            end_spot = df.loc[b]
+            dist_px = _cart_dist(start_spot, end_spot)
+            
+            div = 1 if packing == SpotPacking.GRID_PACKING else 2
+            dist_col = abs(df.loc[b, 'array_col'] - y) // div
+            
+            approx_nb += 1
+            
+            if dist_col > max_dist_col:
+                max_dist_col = dist_col
+                best_approx = inter_spot_dist / (dist_px / dist_col)
+            if approx_nb > 3:
+                break
+            
+    if approx_nb == 0:
+        raise Exception("Pixel size estimation failed. Couldn't find two spots on the same row")
+            
+    return best_approx, max_dist_col
+
+def create_df_coord_visium(data: SpatialData):
+    '''
+    Create a DataFrame with coordinates and array indices from Visium SpatialData.
+
+    This function processes the spatial shapes and table data from a Visium SpatialData object 
+    to generate a DataFrame containing pixel coordinates (row and column) for each spot 
+    on the tissue image at full resolution. It also includes the corresponding array row and column 
+    indices from the data tables.
+
+    Parameters
+    ----------
+    data : SpatialData
+        A SpatialData object containing Visium spatial information, including shapes and table data 
+        (spot coordinates and array indices).
+
+    Returns
+    -------
+    df_coord : pandas.DataFrame
+        A DataFrame with the following columns:
+        - 'pxl_row_in_fullres': Pixel row coordinates in full-resolution tissue image.
+        - 'pxl_col_in_fullres': Pixel column coordinates in full-resolution tissue image.
+        - 'array_row': Row index of the spot in the Visium array.
+        - 'array_col': Column index of the spot in the Visium array.
+    '''
+    tissue_name = list(data.shapes.keys())[0]
+    shapes_df = data.shapes[tissue_name]
+    shapes_df['pxl_col_in_fullres'] = shapes_df.geometry.apply(lambda geom: geom.x)
+    shapes_df['pxl_row_in_fullres'] = shapes_df.geometry.apply(lambda geom: geom.y)
+
+
+    shapes_df['array_row'] = list(data.tables['table'].obs['array_row'])
+    shapes_df['array_col'] =  list(data.tables['table'].obs['array_col'])
+
+    # Now, you have the necessary DataFrame in the correct format:
+    df_coord = shapes_df[['pxl_row_in_fullres', 'pxl_col_in_fullres', 'array_row', 'array_col']]
+    return df_coord
+
+
+def calculate_pixel_size_from_visium(
+    path: str, 
+    dataset_id: str,
+    counts_file: str, 
+    fullres_image_file: str, 
+    tissue_positions_file: str, 
+    scalefactors_file: str,
+    inter_spot_dist: float = 100.0
+) -> SpatialData:
+    """
+    Main function to load data into a spatialdata class and calculate scale0 image shape and pixel size.
+
+    Parameters
+    ----------
+    path : str
+        Path to the directory containing the data.
+    dataset_id : str
+        ID of the dataset to use.
+    counts_file : str
+        Path to the filtered feature barcode matrix (counts file).
+    fullres_image_file : str
+        Path to the full-resolution image file (usually tissue_hires_image.png).
+    tissue_positions_file : str
+        Path to the tissue positions file (usually tissue_positions_list.csv).
+    scalefactors_file : str
+        Path to the scalefactors file (usually scalefactors_json.json).
+    inter_spot_dist : float, optional
+        Distance between 2 spots in a visium field. Default value = 100um.
+
+    Returns
+    -------
+    visium_sdata : SpatialData
+        SpatialData object that includes both image shape and pixel size stored in tables['table'].uns
+    """
+    
+    # Load the SpatialData object using the visium function. 6 files are expected to be passed from Visium raw data
+    visium_sdata = visium(
+        path=path,
+        dataset_id=dataset_id,
+        counts_file=counts_file,
+        fullres_image_file=fullres_image_file,
+        tissue_positions_file=tissue_positions_file,
+        scalefactors_file=scalefactors_file
+    )
+    
+    df_coord = create_df_coord_visium(visium_sdata)
+
+    pixel_size, _ = find_pixel_size_visium(df_coord, inter_spot_dist)
+    
+    image_shape = get_sdata_res(visium_sdata)
+    
+
+    visium_sdata.tables['table'].uns['image_shape'] = image_shape
+    visium_sdata.tables['table'].uns['pixel_size'] = pixel_size
+    
+    return visium_sdata
\ No newline at end of file

From 450bcd206416ee276230f3b73062164580995cfd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 10 Oct 2024 14:25:36 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spatialdata_io/readers/visium.py | 121 ++++++++++++++-------------
 1 file changed, 61 insertions(+), 60 deletions(-)

diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py
index 235d7666..e4d5a5cb 100644
--- a/src/spatialdata_io/readers/visium.py
+++ b/src/spatialdata_io/readers/visium.py
@@ -4,9 +4,10 @@
 import os
 import re
 from collections.abc import Mapping
+from enum import Enum
 from pathlib import Path
 from types import MappingProxyType
-from typing import Any
+from typing import Any, Tuple
 
 import numpy as np
 import pandas as pd
@@ -22,12 +23,6 @@
 from spatialdata_io._docs import inject_docs
 from spatialdata_io.readers._utils._utils import _read_counts
 
-
-from typing import List, Tuple, Union
-import scanpy as sc 
-import spatialdata as sd
-from enum import Enum
-
 __all__ = ["visium"]
 
 
@@ -247,7 +242,7 @@ def visium(
             transformations={"downscaled_lowres": Identity()},
             rgb=None,
         )
-        
+
     return SpatialData(images=images, shapes=shapes, table=table)
 
 
@@ -285,18 +280,18 @@ def _read_image(image_file: Path, imread_kwargs: dict[str, Any]) -> Any:
     return image
 
 
-def get_sdata_res(sdata:SpatialData):
-    '''
+def get_sdata_res(sdata: SpatialData):
+    """
     Retrieve the image resolution from the Visium SpatialData object.
 
-    This function extracts the shape (resolution) of the highest resolution image (scale0) 
-    from the Visium SpatialData object. The shape is returned as the number of channels (c), 
+    This function extracts the shape (resolution) of the highest resolution image (scale0)
+    from the Visium SpatialData object. The shape is returned as the number of channels (c),
     height (y), and width (x) in pixels.
 
     Parameters
     ----------
     sdata : SpatialData
-        A SpatialData object containing images and spatial data, with image resolutions stored 
+        A SpatialData object containing images and spatial data, with image resolutions stored
         in a multi-scale format.
 
     Returns
@@ -306,23 +301,27 @@ def get_sdata_res(sdata:SpatialData):
         - c: Number of image channels (typically RGB).
         - y: Image height in pixels.
         - x: Image width in pixels.
-    '''
+    """
 
     image_name = list(sdata.images.keys())[0]
-    dimensions = sdata.images[image_name]['scale0'].dims
-    shape = dimensions['c'],dimensions['y'],dimensions['x']
+    dimensions = sdata.images[image_name]["scale0"].dims
+    shape = dimensions["c"], dimensions["y"], dimensions["x"]
     return shape
 
+
 class SpotPacking(Enum):
-    """Types of ST spots disposition, 
+    """Types of ST spots disposition,
     for Orange Crate Packing see:
-    https://kb.10xgenomics.com/hc/en-us/articles/360041426992-Where-can-I-find-the-Space-Ranger-barcode-whitelist-and-their-coordinates-on-the-slide    
+    https://kb.10xgenomics.com/hc/en-us/articles/360041426992-Where-can-I-find-the-Space-Ranger-barcode-whitelist-and-their-coordinates-on-the-slide
     """
+
     ORANGE_CRATE_PACKING = 0
     GRID_PACKING = 1
 
 
-def find_pixel_size_visium(my_df: pd.DataFrame, inter_spot_dist: float=100., packing: SpotPacking = SpotPacking.ORANGE_CRATE_PACKING) -> Tuple[float, int]:
+def find_pixel_size_visium(
+    my_df: pd.DataFrame, inter_spot_dist: float = 100.0, packing: SpotPacking = SpotPacking.ORANGE_CRATE_PACKING
+) -> tuple[float, int]:
     """Estimate the pixel size of an image in um/px given a dataframe containing the spot coordinates in that image
 
     Args:
@@ -337,56 +336,60 @@ def find_pixel_size_visium(my_df: pd.DataFrame, inter_spot_dist: float=100., pac
     Returns:
         Tuple[float, int]: approximation of the pixel size in um/px and over how many spots that pixel size was estimated
     """
+
     def _cart_dist(start_spot, end_spot):
         """cartesian distance in pixel between two spots"""
-        d = np.sqrt((start_spot['pxl_col_in_fullres'] - end_spot['pxl_col_in_fullres']) ** 2 \
-            + (start_spot['pxl_row_in_fullres'] - end_spot['pxl_row_in_fullres']) ** 2)
+        d = np.sqrt(
+            (start_spot["pxl_col_in_fullres"] - end_spot["pxl_col_in_fullres"]) ** 2
+            + (start_spot["pxl_row_in_fullres"] - end_spot["pxl_row_in_fullres"]) ** 2
+        )
         return d
-    
+
     df = my_df.copy()
-    
+
     max_dist_col = 0
     approx_nb = 0
     best_approx = 0
-    df = df.sort_values('array_row')
+    df = df.sort_values("array_row")
     for _, row in df.iterrows():
-        y = row['array_col']
-        x = row['array_row']
-        if len(df[df['array_row'] == x]) > 1:
-            b = df[df['array_row'] == x]['array_col'].idxmax()
+        y = row["array_col"]
+        x = row["array_row"]
+        if len(df[df["array_row"] == x]) > 1:
+            b = df[df["array_row"] == x]["array_col"].idxmax()
             start_spot = row
             end_spot = df.loc[b]
             dist_px = _cart_dist(start_spot, end_spot)
-            
+
             div = 1 if packing == SpotPacking.GRID_PACKING else 2
-            dist_col = abs(df.loc[b, 'array_col'] - y) // div
-            
+            dist_col = abs(df.loc[b, "array_col"] - y) // div
+
             approx_nb += 1
-            
+
             if dist_col > max_dist_col:
                 max_dist_col = dist_col
                 best_approx = inter_spot_dist / (dist_px / dist_col)
             if approx_nb > 3:
                 break
-            
+
     if approx_nb == 0:
         raise Exception("Pixel size estimation failed. Couldn't find two spots on the same row")
-            
+
     return best_approx, max_dist_col
 
+
 def create_df_coord_visium(data: SpatialData):
-    '''
+    """
     Create a DataFrame with coordinates and array indices from Visium SpatialData.
 
-    This function processes the spatial shapes and table data from a Visium SpatialData object 
-    to generate a DataFrame containing pixel coordinates (row and column) for each spot 
-    on the tissue image at full resolution. It also includes the corresponding array row and column 
+    This function processes the spatial shapes and table data from a Visium SpatialData object
+    to generate a DataFrame containing pixel coordinates (row and column) for each spot
+    on the tissue image at full resolution. It also includes the corresponding array row and column
     indices from the data tables.
 
     Parameters
     ----------
     data : SpatialData
-        A SpatialData object containing Visium spatial information, including shapes and table data 
+        A SpatialData object containing Visium spatial information, including shapes and table data
         (spot coordinates and array indices).
 
     Returns
@@ -397,29 +400,28 @@ def create_df_coord_visium(data: SpatialData):
         - 'pxl_col_in_fullres': Pixel column coordinates in full-resolution tissue image.
         - 'array_row': Row index of the spot in the Visium array.
         - 'array_col': Column index of the spot in the Visium array.
-    '''
+    """
     tissue_name = list(data.shapes.keys())[0]
     shapes_df = data.shapes[tissue_name]
-    shapes_df['pxl_col_in_fullres'] = shapes_df.geometry.apply(lambda geom: geom.x)
-    shapes_df['pxl_row_in_fullres'] = shapes_df.geometry.apply(lambda geom: geom.y)
-
+    shapes_df["pxl_col_in_fullres"] = shapes_df.geometry.apply(lambda geom: geom.x)
+    shapes_df["pxl_row_in_fullres"] = shapes_df.geometry.apply(lambda geom: geom.y)
 
-    shapes_df['array_row'] = list(data.tables['table'].obs['array_row'])
-    shapes_df['array_col'] =  list(data.tables['table'].obs['array_col'])
+    shapes_df["array_row"] = list(data.tables["table"].obs["array_row"])
+    shapes_df["array_col"] = list(data.tables["table"].obs["array_col"])
 
     # Now, you have the necessary DataFrame in the correct format:
-    df_coord = shapes_df[['pxl_row_in_fullres', 'pxl_col_in_fullres', 'array_row', 'array_col']]
+    df_coord = shapes_df[["pxl_row_in_fullres", "pxl_col_in_fullres", "array_row", "array_col"]]
     return df_coord
 
 
 def calculate_pixel_size_from_visium(
-    path: str, 
+    path: str,
     dataset_id: str,
-    counts_file: str, 
-    fullres_image_file: str, 
-    tissue_positions_file: str, 
+    counts_file: str,
+    fullres_image_file: str,
+    tissue_positions_file: str,
     scalefactors_file: str,
-    inter_spot_dist: float = 100.0
+    inter_spot_dist: float = 100.0,
 ) -> SpatialData:
     """
     Main function to load data into a spatialdata class and calculate scale0 image shape and pixel size.
@@ -446,7 +448,7 @@ def calculate_pixel_size_from_visium(
     visium_sdata : SpatialData
         SpatialData object that includes both image shape and pixel size stored in tables['table'].uns
     """
-    
+
     # Load the SpatialData object using the visium function. 6 files are expected to be passed from Visium raw data
     visium_sdata = visium(
         path=path,
@@ -454,17 +456,16 @@ def calculate_pixel_size_from_visium(
         counts_file=counts_file,
         fullres_image_file=fullres_image_file,
         tissue_positions_file=tissue_positions_file,
-        scalefactors_file=scalefactors_file
+        scalefactors_file=scalefactors_file,
     )
-    
+
     df_coord = create_df_coord_visium(visium_sdata)
 
     pixel_size, _ = find_pixel_size_visium(df_coord, inter_spot_dist)
-    
+
     image_shape = get_sdata_res(visium_sdata)
-    
 
-    visium_sdata.tables['table'].uns['image_shape'] = image_shape
-    visium_sdata.tables['table'].uns['pixel_size'] = pixel_size
-    
-    return visium_sdata
\ No newline at end of file
+    visium_sdata.tables["table"].uns["image_shape"] = image_shape
+    visium_sdata.tables["table"].uns["pixel_size"] = pixel_size
+
+    return visium_sdata