From f2937209801fe86cfbb4aa559a64d7752aaf7013 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Thu, 21 Nov 2024 09:39:29 +0100 Subject: [PATCH] added possibility to set neighborhood names for lists of neighborhoods instead of needing to call it on each neighborhood individually --- spatialproteomics/nh/neighborhood.py | 99 +++++++++++++++++++------- tests/nh/test_set_neighborhood_name.py | 48 ++++++++++++- 2 files changed, 119 insertions(+), 28 deletions(-) diff --git a/spatialproteomics/nh/neighborhood.py b/spatialproteomics/nh/neighborhood.py index d3273dd..0767509 100644 --- a/spatialproteomics/nh/neighborhood.py +++ b/spatialproteomics/nh/neighborhood.py @@ -486,19 +486,20 @@ def set_neighborhood_colors( return xr.merge([self._obj.drop_vars(Layers.NH_PROPERTIES), da]) - def set_neighborhood_name(self, neighborhood: Union[int, str], name: str): + def set_neighborhood_name(self, neighborhoods: Union[int, str, List], names: Union[str, List]): """ - Set the name of a specific neighborhood. + Set the name of one or more neighborhoods. - This method sets the 'name' of a specific neighborhood identified by the 'neighborhood'. - The 'neighborhood' can be either a neighborhood ID or the name of the neighborhood. + This method updates the 'name' of specific neighborhoods identified by the 'neighborhood'. + The 'neighborhoods' can be a single neighborhood ID or name, or a list of IDs/names. Similarly, + the 'names' parameter can be a single string or a list of strings. Parameters ---------- - neighborhood : int or str - The ID or name of the neighborhood whose name will be updated. - name : str - The new name to be assigned to the specified neighborhood. + neighborhoods : int, str, or list + The ID(s) or name(s) of the neighborhoods whose names will be updated. + names : str or list + The new name(s) to be assigned to the specified neighborhoods. Returns ------- @@ -506,32 +507,78 @@ def set_neighborhood_name(self, neighborhood: Union[int, str], name: str): Notes ----- - - The function converts the 'neighborhood' from its name to the corresponding ID for internal processing. - - It updates the name of the neighborhood in the data object to the new 'name'. + - When both parameters are lists, their lengths must match. + - The function converts each 'neighborhood' from its name to the corresponding ID for internal processing. + - It updates the name(s) of the neighborhood(s) in the data object to the new 'name(s)'. """ - # checking that a neighborhood layer is already present + # Ensure the neighborhood layer exists assert Layers.NH_PROPERTIES in self._obj, "No neighborhood layer found in the data object." - # checking if the old neighborhood exists - assert ( - neighborhood in self._obj.nh - ), f"Neighborhood {neighborhood} not found. Existing cell types: {self._obj.nh}" - # checking if the new label already exists - assert name not in self._obj[Layers.NH_PROPERTIES].sel( - {Dims.NH_PROPS: Props.NAME} - ), f"Neighborhood name {name} already exists." - - # getting the original neighborhood properties + + # checking if the user provided dict_values or dict_keys and turns them into a list if that is the case + if type(neighborhoods) is {}.keys().__class__ or type(neighborhoods) is {}.values().__class__: + neighborhoods = list(neighborhoods) + if type(names) is {}.keys().__class__ or type(names) is {}.values().__class__: + names = list(names) + + # Handle single inputs by converting them into lists for uniform processing + if not isinstance(neighborhoods, list): + neighborhoods = [neighborhoods] + if not isinstance(names, list): + names = [names] + + # Ensure the lengths of neighborhoods and names match + assert len(neighborhoods) == len( + names + ), f"Mismatch in lengths: {len(neighborhoods)} neighborhoods and {len(names)} names provided." + + # ensure that the neighborhoods are provided as either strings or integers, but not mixed + assert all([isinstance(n, str) for n in neighborhoods]) or all( + [isinstance(n, int) for n in neighborhoods] + ), "Neighborhoods must be provided as either strings or integers, but not mixed." + + # ensure that the names are provided as strings + assert all([isinstance(n, str) for n in names]), "Names must be provided as strings." + + # ensure that there are no duplicates in the names + assert len(names) == len(set(names)), "Names must be unique." + + # Check that all neighborhoods exist + invalid_neighborhoods = [n for n in neighborhoods if n not in self._obj.nh] + + # if the neighborhoods are provided as strings + if all([isinstance(n, str) for n in neighborhoods]): + existing_names = self._obj[Layers.NH_PROPERTIES].sel({Dims.NH_PROPS: Props.NAME}).values + assert not invalid_neighborhoods, ( + f"Neighborhood(s) {invalid_neighborhoods} not found. " f"Existing neighborhoods: {existing_names}" + ) + + # if they are provided as integers + if all([isinstance(n, int) for n in neighborhoods]): + existing_names = self._obj.coords["neighborhoods"].values + assert not invalid_neighborhoods, ( + f"Neighborhood(s) {invalid_neighborhoods} not found. " f"Existing neighborhoods: {existing_names}" + ) + + # Check that all new names are unique and do not already exist + existing_names = set(self._obj[Layers.NH_PROPERTIES].sel({Dims.NH_PROPS: Props.NAME}).values) + duplicate_names = [n for n in names if n in existing_names] + assert not duplicate_names, f"Neighborhood name(s) {duplicate_names} already exist in the data object." + + # Retrieve the original neighborhood properties property_layer = self._obj[Layers.NH_PROPERTIES].copy() - if isinstance(neighborhood, str): - neighborhood = self._obj.nh._neighborhood_name_to_id(neighborhood) + for n, new_name in zip(neighborhoods, names): + # Convert neighborhood name to ID if necessary + if isinstance(n, str): + n = self._obj.nh._neighborhood_name_to_id(n) - property_layer.loc[neighborhood, Props.NAME] = name + # Update the name + property_layer.loc[n, Props.NAME] = new_name - # removing the old property layer + # Remove the old property layer obj = self._obj.pp.drop_layers(Layers.NH_PROPERTIES, drop_obs=False) - # adding the new property layer + # Add the updated property layer return xr.merge([property_layer, obj]) def compute_neighborhoods_radius( diff --git a/tests/nh/test_set_neighborhood_name.py b/tests/nh/test_set_neighborhood_name.py index e5cd512..8d7c583 100644 --- a/tests/nh/test_set_neighborhood_name.py +++ b/tests/nh/test_set_neighborhood_name.py @@ -2,15 +2,59 @@ def test_set_neighborhood_name(dataset_neighborhoods): + # single values, either as string or as integer dataset_neighborhoods.nh.set_neighborhood_name("Neighborhood 1", "Dummy Neighborhood") dataset_neighborhoods.nh.set_neighborhood_name(3, "Dummy Neighborhood 2") + # lists + dataset_neighborhoods.nh.set_neighborhood_name( + ["Neighborhood 1", "Neighborhood 2"], ["Dummy Neighborhood 3", "Dummy Neighborhood 4"] + ) + dataset_neighborhoods.nh.set_neighborhood_name([3, 4], ["Dummy Neighborhood 3", "Dummy Neighborhood 4"]) + + # dict keys and values + tmp_dict = {"Neighborhood 1": "Dummy Neighborhood 5", "Neighborhood 2": "Dummy Neighborhood 6"} + dataset_neighborhoods.nh.set_neighborhood_name(tmp_dict.keys(), tmp_dict.values()) + tmp_dict = {3: "Dummy Neighborhood 7", 4: "Dummy Neighborhood 8"} + dataset_neighborhoods.nh.set_neighborhood_name(tmp_dict.keys(), tmp_dict.values()) + + +def test_set_neighborhood_name_different_length(dataset_neighborhoods): + with pytest.raises(AssertionError, match="Mismatch in lengths"): + dataset_neighborhoods.nh.set_neighborhood_name(["Neighborhood 1", "Neighborhood 2"], ["Dummy Neighborhood 3"]) + def test_set_neighborhood_name_already_exists(dataset_neighborhoods): - with pytest.raises(AssertionError, match="Neighborhood name Neighborhood 2 already exists."): + with pytest.raises(AssertionError, match="already exist in the data object."): dataset_neighborhoods.nh.set_neighborhood_name("Neighborhood 1", "Neighborhood 2") def test_set_neighborhood_name_not_found(dataset_neighborhoods): - with pytest.raises(AssertionError, match="Neighborhood Neighborhood NA not found."): + # string + with pytest.raises(AssertionError, match="not found. Existing neighborhoods"): dataset_neighborhoods.nh.set_neighborhood_name("Neighborhood NA", "Dummy Neighborhood") + + # integer + with pytest.raises(AssertionError, match="not found. Existing neighborhoods"): + dataset_neighborhoods.nh.set_neighborhood_name(10, "Dummy Neighborhood") + + +def test_set_neighborhood_name_mixed_inputs(dataset_neighborhoods): + with pytest.raises( + AssertionError, match="Neighborhoods must be provided as either strings or integers, but not mixed." + ): + dataset_neighborhoods.nh.set_neighborhood_name( + [3, "Neighborhood 1"], ["Dummy Neighborhood 3", "Dummy Neighborhood 4"] + ) + + +def test_set_neighborhood_name_int_name(dataset_neighborhoods): + with pytest.raises(AssertionError, match="Names must be provided as strings."): + dataset_neighborhoods.nh.set_neighborhood_name(3, 4) + + +def test_set_neighborhood_name_duplicate_names(dataset_neighborhoods): + with pytest.raises(AssertionError, match="Names must be unique."): + dataset_neighborhoods.nh.set_neighborhood_name( + ["Neighborhood 1", "Neighborhood 2"], ["Dummy Neighborhood 3", "Dummy Neighborhood 3"] + )