Skip to content

Commit

Permalink
added possibility to set neighborhood names for lists of neighborhood…
Browse files Browse the repository at this point in the history
…s instead of needing to call it on each neighborhood individually
  • Loading branch information
MeyerBender committed Nov 21, 2024
1 parent aa6214c commit f293720
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 28 deletions.
99 changes: 73 additions & 26 deletions spatialproteomics/nh/neighborhood.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,52 +486,99 @@ def set_neighborhood_colors(

return xr.merge([self._obj.drop_vars(Layers.NH_PROPERTIES), da])

def set_neighborhood_name(self, neighborhood: Union[int, str], name: str):
def set_neighborhood_name(self, neighborhoods: Union[int, str, List], names: Union[str, List]):
"""
Set the name of a specific neighborhood.
Set the name of one or more neighborhoods.
This method sets the 'name' of a specific neighborhood identified by the 'neighborhood'.
The 'neighborhood' can be either a neighborhood ID or the name of the neighborhood.
This method updates the 'name' of specific neighborhoods identified by the 'neighborhood'.
The 'neighborhoods' can be a single neighborhood ID or name, or a list of IDs/names. Similarly,
the 'names' parameter can be a single string or a list of strings.
Parameters
----------
neighborhood : int or str
The ID or name of the neighborhood whose name will be updated.
name : str
The new name to be assigned to the specified neighborhood.
neighborhoods : int, str, or list
The ID(s) or name(s) of the neighborhoods whose names will be updated.
names : str or list
The new name(s) to be assigned to the specified neighborhoods.
Returns
-------
xr.Dataset
Notes
-----
- The function converts the 'neighborhood' from its name to the corresponding ID for internal processing.
- It updates the name of the neighborhood in the data object to the new 'name'.
- When both parameters are lists, their lengths must match.
- The function converts each 'neighborhood' from its name to the corresponding ID for internal processing.
- It updates the name(s) of the neighborhood(s) in the data object to the new 'name(s)'.
"""
# checking that a neighborhood layer is already present
# Ensure the neighborhood layer exists
assert Layers.NH_PROPERTIES in self._obj, "No neighborhood layer found in the data object."
# checking if the old neighborhood exists
assert (
neighborhood in self._obj.nh
), f"Neighborhood {neighborhood} not found. Existing cell types: {self._obj.nh}"
# checking if the new label already exists
assert name not in self._obj[Layers.NH_PROPERTIES].sel(
{Dims.NH_PROPS: Props.NAME}
), f"Neighborhood name {name} already exists."

# getting the original neighborhood properties

# checking if the user provided dict_values or dict_keys and turns them into a list if that is the case
if type(neighborhoods) is {}.keys().__class__ or type(neighborhoods) is {}.values().__class__:
neighborhoods = list(neighborhoods)
if type(names) is {}.keys().__class__ or type(names) is {}.values().__class__:
names = list(names)

# Handle single inputs by converting them into lists for uniform processing
if not isinstance(neighborhoods, list):
neighborhoods = [neighborhoods]
if not isinstance(names, list):
names = [names]

# Ensure the lengths of neighborhoods and names match
assert len(neighborhoods) == len(
names
), f"Mismatch in lengths: {len(neighborhoods)} neighborhoods and {len(names)} names provided."

# ensure that the neighborhoods are provided as either strings or integers, but not mixed
assert all([isinstance(n, str) for n in neighborhoods]) or all(
[isinstance(n, int) for n in neighborhoods]
), "Neighborhoods must be provided as either strings or integers, but not mixed."

# ensure that the names are provided as strings
assert all([isinstance(n, str) for n in names]), "Names must be provided as strings."

# ensure that there are no duplicates in the names
assert len(names) == len(set(names)), "Names must be unique."

# Check that all neighborhoods exist
invalid_neighborhoods = [n for n in neighborhoods if n not in self._obj.nh]

# if the neighborhoods are provided as strings
if all([isinstance(n, str) for n in neighborhoods]):
existing_names = self._obj[Layers.NH_PROPERTIES].sel({Dims.NH_PROPS: Props.NAME}).values
assert not invalid_neighborhoods, (
f"Neighborhood(s) {invalid_neighborhoods} not found. " f"Existing neighborhoods: {existing_names}"
)

# if they are provided as integers
if all([isinstance(n, int) for n in neighborhoods]):
existing_names = self._obj.coords["neighborhoods"].values
assert not invalid_neighborhoods, (
f"Neighborhood(s) {invalid_neighborhoods} not found. " f"Existing neighborhoods: {existing_names}"
)

# Check that all new names are unique and do not already exist
existing_names = set(self._obj[Layers.NH_PROPERTIES].sel({Dims.NH_PROPS: Props.NAME}).values)
duplicate_names = [n for n in names if n in existing_names]
assert not duplicate_names, f"Neighborhood name(s) {duplicate_names} already exist in the data object."

# Retrieve the original neighborhood properties
property_layer = self._obj[Layers.NH_PROPERTIES].copy()

if isinstance(neighborhood, str):
neighborhood = self._obj.nh._neighborhood_name_to_id(neighborhood)
for n, new_name in zip(neighborhoods, names):
# Convert neighborhood name to ID if necessary
if isinstance(n, str):
n = self._obj.nh._neighborhood_name_to_id(n)

property_layer.loc[neighborhood, Props.NAME] = name
# Update the name
property_layer.loc[n, Props.NAME] = new_name

# removing the old property layer
# Remove the old property layer
obj = self._obj.pp.drop_layers(Layers.NH_PROPERTIES, drop_obs=False)

# adding the new property layer
# Add the updated property layer
return xr.merge([property_layer, obj])

def compute_neighborhoods_radius(
Expand Down
48 changes: 46 additions & 2 deletions tests/nh/test_set_neighborhood_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,59 @@


def test_set_neighborhood_name(dataset_neighborhoods):
# single values, either as string or as integer
dataset_neighborhoods.nh.set_neighborhood_name("Neighborhood 1", "Dummy Neighborhood")
dataset_neighborhoods.nh.set_neighborhood_name(3, "Dummy Neighborhood 2")

# lists
dataset_neighborhoods.nh.set_neighborhood_name(
["Neighborhood 1", "Neighborhood 2"], ["Dummy Neighborhood 3", "Dummy Neighborhood 4"]
)
dataset_neighborhoods.nh.set_neighborhood_name([3, 4], ["Dummy Neighborhood 3", "Dummy Neighborhood 4"])

# dict keys and values
tmp_dict = {"Neighborhood 1": "Dummy Neighborhood 5", "Neighborhood 2": "Dummy Neighborhood 6"}
dataset_neighborhoods.nh.set_neighborhood_name(tmp_dict.keys(), tmp_dict.values())
tmp_dict = {3: "Dummy Neighborhood 7", 4: "Dummy Neighborhood 8"}
dataset_neighborhoods.nh.set_neighborhood_name(tmp_dict.keys(), tmp_dict.values())


def test_set_neighborhood_name_different_length(dataset_neighborhoods):
with pytest.raises(AssertionError, match="Mismatch in lengths"):
dataset_neighborhoods.nh.set_neighborhood_name(["Neighborhood 1", "Neighborhood 2"], ["Dummy Neighborhood 3"])


def test_set_neighborhood_name_already_exists(dataset_neighborhoods):
with pytest.raises(AssertionError, match="Neighborhood name Neighborhood 2 already exists."):
with pytest.raises(AssertionError, match="already exist in the data object."):
dataset_neighborhoods.nh.set_neighborhood_name("Neighborhood 1", "Neighborhood 2")


def test_set_neighborhood_name_not_found(dataset_neighborhoods):
with pytest.raises(AssertionError, match="Neighborhood Neighborhood NA not found."):
# string
with pytest.raises(AssertionError, match="not found. Existing neighborhoods"):
dataset_neighborhoods.nh.set_neighborhood_name("Neighborhood NA", "Dummy Neighborhood")

# integer
with pytest.raises(AssertionError, match="not found. Existing neighborhoods"):
dataset_neighborhoods.nh.set_neighborhood_name(10, "Dummy Neighborhood")


def test_set_neighborhood_name_mixed_inputs(dataset_neighborhoods):
with pytest.raises(
AssertionError, match="Neighborhoods must be provided as either strings or integers, but not mixed."
):
dataset_neighborhoods.nh.set_neighborhood_name(
[3, "Neighborhood 1"], ["Dummy Neighborhood 3", "Dummy Neighborhood 4"]
)


def test_set_neighborhood_name_int_name(dataset_neighborhoods):
with pytest.raises(AssertionError, match="Names must be provided as strings."):
dataset_neighborhoods.nh.set_neighborhood_name(3, 4)


def test_set_neighborhood_name_duplicate_names(dataset_neighborhoods):
with pytest.raises(AssertionError, match="Names must be unique."):
dataset_neighborhoods.nh.set_neighborhood_name(
["Neighborhood 1", "Neighborhood 2"], ["Dummy Neighborhood 3", "Dummy Neighborhood 3"]
)

0 comments on commit f293720

Please sign in to comment.