Merge pull request #59 from sagar87/docs/notebook_updates

Docs/notebook updates
sagar87 · Jul 1, 2024 · 28800a3 · 28800a3
2 parents e4dba67 + 6ef4963
commit 28800a3
Show file tree

Hide file tree

Showing 13 changed files with 1,456 additions and 989 deletions.
diff --git a/docs/index.rst b/docs/index.rst
@@ -24,6 +24,7 @@ Welcome to the documentation of spatialproteomics!
    notebooks/Slicing
    notebooks/Segmentation
    notebooks/Plotting
+   notebooks/ImageProcessing
    notebooks/Extracting
    notebooks/CellTypePrediction
    notebooks/Exporting

diff --git a/docs/notebooks/CellTypePrediction.ipynb b/docs/notebooks/CellTypePrediction.ipynb
diff --git a/docs/notebooks/ExampleWorkflow.ipynb b/docs/notebooks/ExampleWorkflow.ipynb
diff --git a/docs/notebooks/Exporting.ipynb b/docs/notebooks/Exporting.ipynb
@@ -118,7 +118,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Zarr\n",
+    "## Exporting to Zarr\n",
     "This is the easiest file format to work with. It allows you to store and load the xarray objects with a single line of code. \n",
     "\n",
     "In case there are issues with simply running `ds.to_zarr(\"your_path.zarr\")`, you might need to parse the dtypes correctly. This is a [known issue](https://github.com/pydata/xarray/issues/3476) with xarray and will hopefully be fixed soon."

diff --git a/docs/notebooks/Extracting.ipynb b/docs/notebooks/Extracting.ipynb
diff --git a/docs/notebooks/ImageProcessing.ipynb b/docs/notebooks/ImageProcessing.ipynb
diff --git a/docs/notebooks/Plotting.ipynb b/docs/notebooks/Plotting.ipynb
diff --git a/docs/notebooks/Segmentation.ipynb b/docs/notebooks/Segmentation.ipynb
diff --git a/docs/notebooks/Slicing.ipynb b/docs/notebooks/Slicing.ipynb
@@ -5,7 +5,7 @@
    "id": "b539ec69-af48-4780-9ab4-0dddc6755e33",
    "metadata": {},
    "source": [
-    "# Slicing"
+    "# Subselecting Data"
    ]
   },
   {
@@ -66,7 +66,7 @@
    "id": "f4f44d5b-255d-47e6-b275-c0f36d20039b",
    "metadata": {},
    "source": [
-    "## Slicing channels and x, y coordinates\n",
+    "## Slicing Channels and Spatial Coordinates\n",
     "\n",
     "To slice specific channels of the image we simply use `.pp` accessor together with the familiar bracket `[]` indexing."
    ]
@@ -362,7 +362,7 @@
    "id": "a795cf6b-e946-4471-8baa-765a0728ce44",
    "metadata": {},
    "source": [
-    "## Slicing labels\n",
+    "## Slicing Labels\n",
     "\n",
     "The labels accessor `.la` allows to select specific cell types by their label number or name."
    ]

diff --git a/spatialproteomics/la/label.py b/spatialproteomics/la/label.py
@@ -79,7 +79,7 @@ def __getitem__(self, indices):
         # removing the old segmentation
         obj = obj.drop_vars(Layers.SEGMENTATION)
         # adding the new segmentation
-        obj = obj.pp.add_segmentation(segmentation)
+        obj = obj.pp.add_segmentation(segmentation, reindex=False)
 
         return obj
 
@@ -668,6 +668,32 @@ def predict_cell_types_argmax(
     def _threshold_label(
         self, channel: str, threshold: float, layer_key: str = Layers.INTENSITY, label: Optional[str] = None
     ):
+        """
+        Apply a threshold to a specific channel in the spatialproteomics object and generate a binary label.
+        This method is called by the threshold_labels() method.
+
+        Parameters:
+        -----------
+        channel : str
+            The name of the channel to apply the threshold to.
+        threshold : float
+            The threshold value to use for binarization.
+        layer_key : str, optional
+            The key of the layer to apply the threshold to. Defaults to Layers.INTENSITY.
+        label : str, optional
+            The name of the label to use for further filtering. Defaults to None.
+
+        Returns:
+        --------
+        obj : spatialproteomics object
+            A copy of the spatialproteomics object with the binary label added as a new feature.
+
+        Raises:
+        -------
+        KeyError
+            If the specified layer_key is not found in the spatialproteomics object.
+            If the specified channel is not found in the spatialproteomics object.
+        """
         if layer_key not in self._obj:
             raise KeyError(f'No layer "{layer_key}" found. Please add it first using pp.add_quantification().')
 

diff --git a/spatialproteomics/pl/plot.py b/spatialproteomics/pl/plot.py
@@ -689,6 +689,8 @@ def scatter_labels(
         ax.set_ylim([ymin, ymax])
         ax.set_xlim([xmin, xmax])
 
+        ax.set_aspect("equal")  # Set equal aspect ratio for x and y axes
+
         if legend:
             legend = self._obj.pl._create_label_legend()
             ax.legend(handles=legend, **legend_kwargs).set_zorder(102)

diff --git a/spatialproteomics/pp/preprocessing.py b/spatialproteomics/pp/preprocessing.py
@@ -360,7 +360,6 @@ def add_observations(
         for k, v in table.items():
             if Dims.FEATURES in self._obj.coords:
                 if k in self._obj.coords[Dims.FEATURES] and not return_xarray:
-                    logger.warning(f"Found {k} in _obs. Skipping.")
                     continue
             # when looking at centroids, it could happen that the image has been cropped before
             # in this case, the x and y coordinates do not necessarily start at 0
@@ -373,7 +372,6 @@ def add_observations(
             data.append(v)
 
         if len(data) == 0:
-            logger.warning("Warning: No properties were added.")
             return self._obj
 
         da = xr.DataArray(
@@ -699,6 +697,11 @@ def add_labels_from_dataframe(
         xr.Dataset
             The updated image container with added labels.
         """
+        # check if properties are already present
+        assert (
+            Layers.PROPERTIES not in self._obj
+        ), "Already found label properties in the object. Please remove them with pp.drop_layers('_properties') first."
+
         if df is None:
             cells = self._obj.coords[Dims.CELLS].values
             labels = np.ones(len(cells))
@@ -898,9 +901,29 @@ def restore(self, method="wiener", **kwargs):
         )
         return xr.merge([obj, normed])
 
-    def filter(self, quantile: float = None, intensity: int = None, key_added: Optional[str] = None):
+    def threshold(self, quantile: float = None, intensity: int = None, key_added: Optional[str] = None):
+        """
+        Apply thresholding to the image layer of the object.
+
+        Parameters:
+        - quantile (float): The quantile value used for thresholding. If provided, the pixels below this quantile will be set to 0.
+        - intensity (int): The absolute intensity value used for thresholding. If provided, the pixels below this intensity will be set to 0.
+        - key_added (Optional[str]): The name of the new image layer after thresholding. If not provided, the original image layer will be replaced.
+
+        Returns:
+        - xr.Dataset: The object with the thresholding applied to the image layer.
+
+        Raises:
+        - ValueError: If both quantile and intensity are None or if both quantile and intensity are provided.
+        """
         if (quantile is None and intensity is None) or (quantile is not None and intensity is not None):
             raise ValueError("Please provide a quantile or absolute intensity cut off.")
+
+        if Layers.PLOT in self._obj:
+            logger.warning(
+                "Please only call pl.colorize() after any preprocessing. Otherwise, the image will not be displayed correctly."
+            )
+
         # Pull out the image from its corresponding field (by default "_image")
         image_layer = self._obj[Layers.IMAGE]
 
@@ -920,11 +943,11 @@ def filter(self, quantile: float = None, intensity: int = None, key_added: Optio
 
             # calculate intensity
             filtered = (image_layer - intensity.reshape(-1, 1, 1)).clip(min=0)
-            # lower = np.quantile(image_layer.values.reshape(image_layer.values.shape[0], -1), quantile, axis=1)
-            # filtered = (image_layer - np.expand_dims(np.diag(lower) if lower.ndim > 1 else lower, (1, 2))).clip(min=0)
+
+        obj = self._obj.copy()
 
         if key_added is None:
-            obj = self._obj.drop(Layers.IMAGE)
+            obj = obj.drop(Layers.IMAGE)
 
         filtered = xr.DataArray(
             filtered,
@@ -960,7 +983,7 @@ def apply(self, func: Callable, key: str = Layers.IMAGE, key_added: str = Layers
         # apply the function to all channels
         obj = self._obj.copy()
         layer = obj[key].copy()
-        processed_layer = xr.apply_ufunc(func, layer)
+        processed_layer = xr.apply_ufunc(func, layer, kwargs=kwargs)
 
         # adding the modified layer to the object
         obj[key_added] = xr.DataArray(

diff --git a/tests/pp/test_add_labels.py b/tests/pp/test_add_labels.py
@@ -1,4 +1,5 @@
 import pandas as pd
+import pytest
 
 from spatialproteomics.constants import Dims, Features, Labels, Layers
 
@@ -57,3 +58,16 @@ def test_add_labels(dataset):
     assert Dims.LABELS in labeled.coords
     assert Features.LABELS in labeled[Layers.OBS].coords[Dims.FEATURES].values
     assert 1 in labeled[Layers.OBS].sel(features=Features.LABELS).values
+
+
+def test_add_labels_existing_labels(dataset_labeled):
+    # creating a dummy dict
+    cells = dataset_labeled.coords[Dims.CELLS].values
+    num_cells = len(cells)
+    label_dict = dict(zip(cells, ["CT1"] * num_cells))
+
+    with pytest.raises(
+        AssertionError,
+        match="Already found label properties in the object.",
+    ):
+        dataset_labeled.pp.add_labels(label_dict)