From 4d6fa632530c7b6d95af7b020cab592fc6db0008 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Thu, 13 Jun 2024 10:01:24 -0400 Subject: [PATCH 1/4] use negative crops + better neutoglancer --- .../datasplits/datasets/arrays/__init__.py | 1 + .../datasets/arrays/concat_array.py | 63 +++ .../datasets/arrays/constant_array.py | 487 ++++++++++++++++++ .../datasets/arrays/constant_array_config.py | 33 ++ .../datasets/arrays/logical_or_array.py | 75 ++- .../datasplits/datasets/arrays/ones_array.py | 4 + .../datasets/arrays/resampled_array.py | 9 +- .../datasplits/datasets/dataset.py | 20 +- .../datasplits/datasplit_generator.py | 98 +++- 9 files changed, 734 insertions(+), 56 deletions(-) create mode 100644 dacapo/experiments/datasplits/datasets/arrays/constant_array.py create mode 100644 dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py diff --git a/dacapo/experiments/datasplits/datasets/arrays/__init__.py b/dacapo/experiments/datasplits/datasets/arrays/__init__.py index 63d6d6e21..74091aba0 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/__init__.py +++ b/dacapo/experiments/datasplits/datasets/arrays/__init__.py @@ -22,3 +22,4 @@ # nonconfigurable arrays (helpers) from .numpy_array import NumpyArray # noqa +from .constant_array_config import ConstantArray, ConstantArrayConfig # noqa diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py index 2cea77a00..dec570731 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py @@ -459,3 +459,66 @@ def __getitem__(self, roi: Roi) -> np.ndarray: f"Concatenated array has only one channel: {self.name} {concatenated.shape}" ) return concatenated + + def _can_neuroglance(self): + """ + This method returns True if the source array can be visualized in neuroglance. + + Returns: + bool: True if the source array can be visualized in neuroglance. + Raises: + ValueError: If the source array is not writable. + Examples: + >>> binarize_array._can_neuroglance() + Note: + This method is used to return True if the source array can be visualized in neuroglance. + """ + return any([source_array._can_neuroglance() for source_array in self.source_arrays.values()]) + + def _neuroglancer_source(self): + """ + This method returns the source array for neuroglancer. + + Returns: + neuroglancer.LocalVolume: The source array for neuroglancer. + Raises: + ValueError: If the source array is not writable. + Examples: + >>> binarize_array._neuroglancer_source() + Note: + This method is used to return the source array for neuroglancer. + """ + # return self._source_array._neuroglancer_source() + return [source_array._neuroglancer_source() for source_array in self.source_arrays.values()] + + def _neuroglancer_layer(self): + """ + This method returns the neuroglancer layer for the source array. + + Returns: + neuroglancer.SegmentationLayer: The neuroglancer layer for the source array. + Raises: + ValueError: If the source array is not writable. + Examples: + >>> binarize_array._neuroglancer_layer() + Note: + This method is used to return the neuroglancer layer for the source array. + """ + # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) + return [source_array._neuroglancer_layer() for source_array in self.source_arrays.values() if source_array._can_neuroglance()] + + def _source_name(self): + """ + This method returns the name of the source array. + + Returns: + str: The name of the source array. + Raises: + ValueError: If the source array is not writable. + Examples: + >>> binarize_array._source_name() + Note: + This method is used to return the name of the source array. + """ + # return self._source_array._source_name() + return [source_array._source_name() for source_array in self.source_arrays.values() if source_array._can_neuroglance()] diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array.py new file mode 100644 index 000000000..c5cb77eea --- /dev/null +++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array.py @@ -0,0 +1,487 @@ +from .array import Array + +from funlib.geometry import Roi + +import numpy as np +import neuroglancer + + +class ConstantArray(Array): + """ + This is a wrapper around another `source_array` that simply provides constant value + with the same metadata as the `source_array`. + + This is useful for creating a mask array that is the same size as the + original array, but with all values set to 1. + + Attributes: + source_array: The source array that this array is based on. + Methods: + like: Create a new ConstantArray with the same metadata as another array. + attrs: Get the attributes of the array. + axes: Get the axes of the array. + dims: Get the dimensions of the array. + voxel_size: Get the voxel size of the array. + roi: Get the region of interest of the array. + writable: Check if the array is writable. + data: Get the data of the array. + dtype: Get the data type of the array. + num_channels: Get the number of channels of the array. + __getitem__: Get a subarray of the array. + Note: + This class is not meant to be instantiated directly. Instead, use the + `like` method to create a new ConstantArray with the same metadata as + another array. + """ + + def __init__(self, array_config): + """ + Initialize the ConstantArray with the given array configuration. + + Args: + array_config: The configuration of the source array. + Raises: + RuntimeError: If the source array is not specified in the + configuration. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> from funlib.geometry import Roi + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> source_array_config = ArrayConfig(source_array) + >>> ones_array = ConstantArray(source_array_config) + >>> ones_array.source_array + NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1) + Notes: + This class is not meant to be instantiated directly. Instead, use the + `like` method to create a new ConstantArray with the same metadata as + another array. + """ + self._source_array = array_config.source_array_config.array_type( + array_config.source_array_config + ) + self._constant = array_config.constant + + @classmethod + def like(cls, array: Array): + """ + Create a new ConstantArray with the same metadata as another array. + + Args: + array: The source array. + Returns: + The new ConstantArray with the same metadata as the source array. + Raises: + RuntimeError: If the source array is not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray.like(source_array) + >>> ones_array.source_array + NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1) + Notes: + This class is not meant to be instantiated directly. Instead, use the + `like` method to create a new ConstantArray with the same metadata as + another array. + + """ + instance = cls.__new__(cls) + instance._source_array = array + return instance + + @property + def attrs(self): + """ + Get the attributes of the array. + + Returns: + An empty dictionary. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.attrs + {} + Notes: + This method is used to get the attributes of the array. The attributes + are stored as key-value pairs in a dictionary. This method returns an + empty dictionary because the ConstantArray does not have any attributes. + """ + return dict() + + @property + def source_array(self) -> Array: + """ + Get the source array that this array is based on. + + Returns: + The source array. + Raises: + RuntimeError: If the source array is not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.source_array + NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1) + Notes: + This method is used to get the source array that this array is based on. + The source array is the array that the ConstantArray is created from. This + method returns the source array that was specified when the ConstantArray + was created. + """ + return self._source_array + + @property + def axes(self): + """ + Get the axes of the array. + + Returns: + The axes of the array. + Raises: + RuntimeError: If the axes are not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.axes + 'zyx' + Notes: + This method is used to get the axes of the array. The axes are the + order of the dimensions of the array. This method returns the axes of + the array that was specified when the ConstantArray was created. + """ + return self.source_array.axes + + @property + def dims(self): + """ + Get the dimensions of the array. + + Returns: + The dimensions of the array. + Raises: + RuntimeError: If the dimensions are not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.dims + (10, 10, 10) + Notes: + This method is used to get the dimensions of the array. The dimensions + are the size of the array along each axis. This method returns the + dimensions of the array that was specified when the ConstantArray was created. + """ + return self.source_array.dims + + @property + def voxel_size(self): + """ + Get the voxel size of the array. + + Returns: + The voxel size of the array. + Raises: + RuntimeError: If the voxel size is not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.voxel_size + (1.0, 1.0, 1.0) + Notes: + This method is used to get the voxel size of the array. The voxel size + is the size of each voxel in the array. This method returns the voxel + size of the array that was specified when the ConstantArray was created. + """ + return self.source_array.voxel_size + + @property + def roi(self): + """ + Get the region of interest of the array. + + Returns: + The region of interest of the array. + Raises: + RuntimeError: If the region of interest is not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> from funlib.geometry import Roi + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.roi + Roi((0, 0, 0), (10, 10, 10)) + Notes: + This method is used to get the region of interest of the array. The + region of interest is the region of the array that contains the data. + This method returns the region of interest of the array that was specified + when the ConstantArray was created. + """ + return self.source_array.roi + + @property + def writable(self) -> bool: + """ + Check if the array is writable. + + Returns: + False. + Raises: + RuntimeError: If the writability of the array is not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.writable + False + Notes: + This method is used to check if the array is writable. An array is + writable if it can be modified in place. This method returns False + because the ConstantArray is read-only and cannot be modified. + """ + return False + + @property + def data(self): + """ + Get the data of the array. + + Returns: + The data of the array. + Raises: + RuntimeError: If the data is not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.data + array([[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]]) + Notes: + This method is used to get the data of the array. The data is the + values that are stored in the array. This method returns a subarray + of the array with all values set to 1. + """ + raise RuntimeError("Cannot get writable version of this data!") + + @property + def dtype(self): + """ + Get the data type of the array. + + Returns: + The data type of the array. + Raises: + RuntimeError: If the data type is not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.dtype + + Notes: + This method is used to get the data type of the array. The data type + is the type of the values that are stored in the array. This method + returns the data type of the array that was specified when the ConstantArray + was created. + """ + return bool + + @property + def num_channels(self): + """ + Get the number of channels of the array. + + Returns: + The number of channels of the array. + Raises: + RuntimeError: If the number of channels is not specified. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> ones_array.num_channels + 1 + Notes: + This method is used to get the number of channels of the array. The + number of channels is the number of values that are stored at each + voxel in the array. This method returns the number of channels of the + array that was specified when the ConstantArray was created. + """ + return self.source_array.num_channels + + def __getitem__(self, roi: Roi) -> np.ndarray: + """ + Get a subarray of the array. + + Args: + roi: The region of interest. + Returns: + A subarray of the array with all values set to 1. + Examples: + >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray + >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray + >>> from funlib.geometry import Roi + >>> import numpy as np + >>> source_array = NumpyArray(np.zeros((10, 10, 10))) + >>> ones_array = ConstantArray(source_array) + >>> roi = Roi((0, 0, 0), (10, 10, 10)) + >>> ones_array[roi] + array([[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]]) + Notes: + This method is used to get a subarray of the array. The subarray is + specified by the region of interest. This method returns a subarray + of the array with all values set to 1. + """ + return np.ones_like(self.source_array.__getitem__(roi), dtype=bool) * self._constant + + def _can_neuroglance(self): + """ + This method returns True if the source array can be visualized in neuroglance. + + Returns: + bool: True if the source array can be visualized in neuroglance. + Raises: + ValueError: If the source array is not writable. + Examples: + >>> binarize_array._can_neuroglance() + Note: + This method is used to return True if the source array can be visualized in neuroglance. + """ + return True + + def _neuroglancer_source(self): + """ + This method returns the source array for neuroglancer. + + Returns: + neuroglancer.LocalVolume: The source array for neuroglancer. + Raises: + ValueError: If the source array is not writable. + Examples: + >>> binarize_array._neuroglancer_source() + Note: + This method is used to return the source array for neuroglancer. + """ + # return self._source_array._neuroglancer_source() + return np.ones_like(self.source_array.data, dtype=np.uint64) * self._constant + + def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume: + """ + Combines dimensions and metadata from self._source_array._neuroglancer_source() + with data from self._neuroglancer_source(). + + Returns: + neuroglancer.LocalVolume: The combined neuroglancer source. + """ + source_array_volume = self._source_array._neuroglancer_source() + result_data = self._neuroglancer_source() + + return neuroglancer.LocalVolume( + data=result_data, + dimensions=source_array_volume.dimensions, + voxel_offset=source_array_volume.voxel_offset, + ) + + def _neuroglancer_layer(self): + """ + This method returns the neuroglancer layer for the source array. + + Returns: + neuroglancer.SegmentationLayer: The neuroglancer layer for the source array. + Raises: + ValueError: If the source array is not writable. + Examples: + >>> binarize_array._neuroglancer_layer() + Note: + This method is used to return the neuroglancer layer for the source array. + """ + # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) + return neuroglancer.SegmentationLayer(source=self._combined_neuroglancer_source()) + + def _source_name(self): + """ + This method returns the name of the source array. + + Returns: + str: The name of the source array. + Raises: + ValueError: If the source array is not writable. + Examples: + >>> binarize_array._source_name() + Note: + This method is used to return the name of the source array. + """ + # return self._source_array._source_name() + return f"{self._constant}_of_{self.source_array._source_name()}" \ No newline at end of file diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py new file mode 100644 index 000000000..9c8521d42 --- /dev/null +++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py @@ -0,0 +1,33 @@ +import attr + +from .array_config import ArrayConfig +from .constant_array import ConstantArray + + +@attr.s +class ConstantArrayConfig(ArrayConfig): + """ + This array read data from the source array and then return a np.ones_like() version. + + This is useful for creating a mask array from a source array. For example, if you have a + 2D array of data and you want to create a mask array that is the same shape as the data + array, you can use this class to create the mask array. + + Attributes: + source_array_config: The source array that you want to copy and fill with ones. + Methods: + create_array: Create the array. + Note: + This class is a subclass of ArrayConfig. + """ + + array_type = ConstantArray + + source_array_config: ArrayConfig = attr.ib( + metadata={"help_text": "The Array that you want to copy and fill with ones."} + ) + + constant : int = attr.ib( + metadata={"help_text": "The constant value to fill the array with."}, + default=1 + ) diff --git a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py index 212d933ac..8fe9da6cc 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py @@ -600,52 +600,48 @@ def _neuroglancer_source(self): The _neuroglancer_source method is used to get the neuroglancer source of the array. The neuroglancer source is the source that is displayed in the neuroglancer viewer. """ + # source_arrays + if hassattr(self._source_array, "source_arrays"): + source_arrays = list(self._source_array.source_arrays) + # apply logical or + mask = np.logical_or.reduce(source_arrays) + return mask return self._source_array._neuroglancer_source() + def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume: + """ + Combines dimensions and metadata from self._source_array._neuroglancer_source() + with data from self._neuroglancer_source(). + + Returns: + neuroglancer.LocalVolume: The combined neuroglancer source. + """ + source_array_volume = self._source_array._neuroglancer_source() + if isinstance(source_array_volume,list): + source_array_volume = source_array_volume[0] + result_data = self._neuroglancer_source() + + return neuroglancer.LocalVolume( + data=result_data, + dimensions=source_array_volume.dimensions, + voxel_offset=source_array_volume.voxel_offset, + ) + def _neuroglancer_layer(self): """ - Get the neuroglancer layer of the array + This method returns the neuroglancer layer for the source array. Returns: - Tuple[neuroglancer.Layer, dict]: The neuroglancer layer of the array + neuroglancer.SegmentationLayer: The neuroglancer layer for the source array. Raises: - ValueError: If the array is not writable + ValueError: If the source array is not writable. Examples: - >>> array_config = MergeInstancesArrayConfig( - ... name="logical_or", - ... source_array_configs=[ - ... ArrayConfig( - ... name="mask1", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask1", - ... mask_id=1, - ... ), - ... ), - ... ArrayConfig( - ... name="mask2", - ... array_type=MaskArray, - ... source_array_config=MaskArrayConfig( - ... name="mask2", - ... mask_id=2, - ... ), - ... ), - ... ], - ... ) - >>> array = array_config.create_array() - >>> array._neuroglancer_layer() - (SegmentationLayer(source='precomputed://https://mybucket.storage.googleapis.com/path/to/logical_or'), {'visible': False}) - Notes: - The _neuroglancer_layer method is used to get the neuroglancer layer of the array. - The neuroglancer layer is the layer that is displayed in the neuroglancer viewer. + >>> binarize_array._neuroglancer_layer() + Note: + This method is used to return the neuroglancer layer for the source array. """ - # Generates an Segmentation layer - - layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - kwargs = { - "visible": False, - } - return layer, kwargs + # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) + return neuroglancer.SegmentationLayer(source=self._combined_neuroglancer_source()) def _source_name(self): """ @@ -684,4 +680,7 @@ def _source_name(self): The _source_name method is used to get the name of the source array. The name of the source array is the name of the array that is being modified. """ - return self._source_array._source_name() + name = self._source_array._source_name() + if isinstance(name, list): + name = "_".join(name) + return "logical_or"+name diff --git a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py b/dacapo/experiments/datasplits/datasets/arrays/ones_array.py index 6fd5c4faf..1e5889ff3 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/ones_array.py @@ -4,6 +4,9 @@ import numpy as np +import logging + +logger = logging.getLogger(__name__) class OnesArray(Array): """ @@ -402,4 +405,5 @@ def __getitem__(self, roi: Roi) -> np.ndarray: specified by the region of interest. This method returns a subarray of the array with all values set to 1. """ + logger.warning("OnesArray is deprecated. Use ConstantArray instead.") return np.ones_like(self.source_array.__getitem__(roi), dtype=bool) diff --git a/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py b/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py index 5c60a5df4..ba6fd99f0 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py @@ -236,10 +236,11 @@ def data(self): Note: This method returns the data of the resampled array. """ - raise ValueError( - "Cannot get a writable view of this array because it is a virtual " - "array created by modifying another array on demand." - ) + return self._source_array.data + # raise ValueError( + # "Cannot get a writable view of this array because it is a virtual " + # "array created by modifying another array on demand." + # ) @property def scale(self): diff --git a/dacapo/experiments/datasplits/datasets/dataset.py b/dacapo/experiments/datasplits/datasets/dataset.py index ced4f58d6..5c70ea307 100644 --- a/dacapo/experiments/datasplits/datasets/dataset.py +++ b/dacapo/experiments/datasplits/datasets/dataset.py @@ -140,13 +140,25 @@ def _neuroglancer_layers(self, prefix="", exclude_layers=None): if ( self.gt is not None and self.gt._can_neuroglance() - and self.gt._source_name() not in exclude_layers ): - layers[self.gt._source_name()] = self.gt._neuroglancer_layer() + new_layers = self.gt._neuroglancer_layer() + if isinstance(new_layers, list): + names = self.gt._source_name() + for name, layer in zip(names, new_layers): + if name not in exclude_layers: + layers[name] = layer + elif self.gt._source_name() not in exclude_layers: + layers[self.gt._source_name()] = new_layers if ( self.mask is not None and self.mask._can_neuroglance() - and self.mask._source_name() not in exclude_layers ): - layers[self.mask._source_name()] = self.mask._neuroglancer_layer() + new_layers = self.mask._neuroglancer_layer() + if isinstance(new_layers, list): + names = self.mask._source_name() + for name, layer in zip(names, new_layers): + if name not in exclude_layers: + layers[name] = layer + elif self.gt._source_name() not in exclude_layers: + layers["mask_"+self.mask._source_name()] = new_layers return layers diff --git a/dacapo/experiments/datasplits/datasplit_generator.py b/dacapo/experiments/datasplits/datasplit_generator.py index a1a0e45c3..7c5b0a331 100644 --- a/dacapo/experiments/datasplits/datasplit_generator.py +++ b/dacapo/experiments/datasplits/datasplit_generator.py @@ -13,6 +13,8 @@ BinarizeArrayConfig, IntensitiesArrayConfig, ConcatArrayConfig, + LogicalOrArrayConfig, + ConstantArrayConfig, ) from dacapo.experiments.datasplits import TrainValidateDataSplitConfig from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig @@ -477,6 +479,7 @@ def __init__( raw_min=0, raw_max=255, classes_separator_caracter="&", + use_negative_class=False, ): """ Initializes the DataSplitGenerator class with the specified: @@ -565,6 +568,12 @@ def __init__( self.raw_min = raw_min self.raw_max = raw_max self.classes_separator_caracter = classes_separator_caracter + self.use_negative_class = use_negative_class + if use_negative_class: + if targets is None: + raise ValueError( + "use_negative_class=True requires targets to be specified." + ) def __str__(self) -> str: """ @@ -712,13 +721,14 @@ def __generate_semantic_seg_datasplit(self): train_dataset_configs = [] validation_dataset_configs = [] for dataset in self.datasets: - raw_config, gt_config = self.__generate_semantic_seg_dataset_crop(dataset) + raw_config, gt_config, mask_config = self.__generate_semantic_seg_dataset_crop(dataset) if dataset.dataset_type == DatasetType.train: train_dataset_configs.append( RawGTDatasetConfig( name=f"{dataset}_{self.class_name}_{self.output_resolution[0]}nm", raw_config=raw_config, gt_config=gt_config, + mask_config=mask_config, ) ) else: @@ -727,6 +737,7 @@ def __generate_semantic_seg_datasplit(self): name=f"{dataset}_{self.class_name}_{self.output_resolution[0]}nm", raw_config=raw_config, gt_config=gt_config, + mask_config=mask_config, ) ) if type(self.class_name) == list: @@ -794,7 +805,10 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec): max=self.raw_max, ) organelle_arrays = {} - classes_datasets, classes = self.check_class_name(gt_dataset) + # classes_datasets, classes = self.check_class_name(gt_dataset) + classes_datasets, classes = format_class_name( + gt_dataset, self.classes_separator_caracter + ) for current_class_dataset, current_class_name in zip(classes_datasets, classes): if not (gt_path / current_class_dataset).exists(): raise FileNotFoundError( @@ -815,26 +829,90 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec): self.output_resolution, "gt", ) - gt_config = BinarizeArrayConfig( - f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_binarized", - source_array_config=gt_config, - groupings=[(current_class_name, [])], - ) + # gt_config = BinarizeArrayConfig( + # f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_binarized", + # source_array_config=gt_config, + # groupings=[(current_class_name, [])], + # ) organelle_arrays[current_class_name] = gt_config + if self.targets is None: targets_str = "_".join(classes) current_targets = classes else: current_targets = self.targets targets_str = "_".join(self.targets) - if len(organelle_arrays) > 1: + + target_images = {} + target_masks = {} + + + missing_classes = [c for c in current_targets if c not in classes] + found_classes = [c for c in current_targets if c in classes] + for t in found_classes: + target_images[t] = organelle_arrays[t] + + if len(missing_classes) > 0: + if not self.use_negative_class: + raise ValueError( + f"Missing classes found, {str(missing_classes)}, please specify use_negative_class=True to generate the missing classes." + ) + else: + if len(organelle_arrays) == 0: + raise ValueError( + f"No target classes found, please specify targets to generate the negative classes." + ) + # generate negative class + if len(organelle_arrays) > 1: + found_gt_config = ConcatArrayConfig( + name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_gt", + channels=list(organelle_arrays.keys()), + source_array_configs=organelle_arrays, + ) + missing_mask_config = LogicalOrArrayConfig( + name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_labelled_voxels", + source_array_config=found_gt_config, + ) + else: + missing_mask_config = list(organelle_arrays.values())[0] + missing_gt_config = ConstantArrayConfig( + name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_gt", + source_array_config=list(organelle_arrays.values())[0], + constant=0, + ) + for t in missing_classes: + target_images[t] = missing_gt_config + target_masks[t] = missing_mask_config + + for t in found_classes: + target_masks[t] = ConstantArrayConfig( + name=f"{dataset}_{t}_{self.output_resolution[0]}nm_labelled_voxels", + source_array_config=target_images[t], + constant=1, + ) + + + + + if len(target_images) > 1: gt_config = ConcatArrayConfig( name=f"{dataset}_{targets_str}_{self.output_resolution[0]}nm_gt", channels=[organelle for organelle in current_targets], - source_array_configs={k: gt for k, gt in organelle_arrays.items()}, + # source_array_configs={k: gt for k, gt in target_images.items()}, + source_array_configs={k: target_images[k] for k in current_targets}, + ) + mask_config = ConcatArrayConfig( + name=f"{dataset}_{targets_str}_{self.output_resolution[0]}nm_mask", + channels=[organelle for organelle in current_targets], + # source_array_configs={k: mask for k, mask in target_masks.items()}, + # to be sure to have the same order + source_array_configs={k: target_masks[k] for k in current_targets}, ) + else: + gt_config = list(target_images.values())[0] + mask_config = list(target_masks.values())[0] - return raw_config, gt_config + return raw_config, gt_config, mask_config # @staticmethod # def generate_csv(datasets: List[DatasetSpec], csv_path: Path): From 4973bf8ac86996836fecf1338376b9841bff91a7 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Thu, 13 Jun 2024 10:02:34 -0400 Subject: [PATCH 2/4] black format --- .../datasets/arrays/concat_array.py | 24 +++++++++++++++---- .../datasets/arrays/constant_array.py | 15 ++++++++---- .../datasets/arrays/constant_array_config.py | 5 ++-- .../datasets/arrays/logical_or_array.py | 10 ++++---- .../datasplits/datasets/arrays/ones_array.py | 1 + .../datasplits/datasets/dataset.py | 12 +++------- .../datasplits/datasplit_generator.py | 22 ++++++++--------- 7 files changed, 52 insertions(+), 37 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py index dec570731..c2ef40969 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py @@ -473,7 +473,12 @@ def _can_neuroglance(self): Note: This method is used to return True if the source array can be visualized in neuroglance. """ - return any([source_array._can_neuroglance() for source_array in self.source_arrays.values()]) + return any( + [ + source_array._can_neuroglance() + for source_array in self.source_arrays.values() + ] + ) def _neuroglancer_source(self): """ @@ -489,7 +494,10 @@ def _neuroglancer_source(self): This method is used to return the source array for neuroglancer. """ # return self._source_array._neuroglancer_source() - return [source_array._neuroglancer_source() for source_array in self.source_arrays.values()] + return [ + source_array._neuroglancer_source() + for source_array in self.source_arrays.values() + ] def _neuroglancer_layer(self): """ @@ -505,7 +513,11 @@ def _neuroglancer_layer(self): This method is used to return the neuroglancer layer for the source array. """ # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - return [source_array._neuroglancer_layer() for source_array in self.source_arrays.values() if source_array._can_neuroglance()] + return [ + source_array._neuroglancer_layer() + for source_array in self.source_arrays.values() + if source_array._can_neuroglance() + ] def _source_name(self): """ @@ -521,4 +533,8 @@ def _source_name(self): This method is used to return the name of the source array. """ # return self._source_array._source_name() - return [source_array._source_name() for source_array in self.source_arrays.values() if source_array._can_neuroglance()] + return [ + source_array._source_name() + for source_array in self.source_arrays.values() + if source_array._can_neuroglance() + ] diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array.py index c5cb77eea..b76d5bd32 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/constant_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array.py @@ -404,7 +404,10 @@ def __getitem__(self, roi: Roi) -> np.ndarray: specified by the region of interest. This method returns a subarray of the array with all values set to 1. """ - return np.ones_like(self.source_array.__getitem__(roi), dtype=bool) * self._constant + return ( + np.ones_like(self.source_array.__getitem__(roi), dtype=bool) + * self._constant + ) def _can_neuroglance(self): """ @@ -436,7 +439,7 @@ def _neuroglancer_source(self): """ # return self._source_array._neuroglancer_source() return np.ones_like(self.source_array.data, dtype=np.uint64) * self._constant - + def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume: """ Combines dimensions and metadata from self._source_array._neuroglancer_source() @@ -447,7 +450,7 @@ def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume: """ source_array_volume = self._source_array._neuroglancer_source() result_data = self._neuroglancer_source() - + return neuroglancer.LocalVolume( data=result_data, dimensions=source_array_volume.dimensions, @@ -468,7 +471,9 @@ def _neuroglancer_layer(self): This method is used to return the neuroglancer layer for the source array. """ # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - return neuroglancer.SegmentationLayer(source=self._combined_neuroglancer_source()) + return neuroglancer.SegmentationLayer( + source=self._combined_neuroglancer_source() + ) def _source_name(self): """ @@ -484,4 +489,4 @@ def _source_name(self): This method is used to return the name of the source array. """ # return self._source_array._source_name() - return f"{self._constant}_of_{self.source_array._source_name()}" \ No newline at end of file + return f"{self._constant}_of_{self.source_array._source_name()}" diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py index 9c8521d42..47c2b8689 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py @@ -27,7 +27,6 @@ class ConstantArrayConfig(ArrayConfig): metadata={"help_text": "The Array that you want to copy and fill with ones."} ) - constant : int = attr.ib( - metadata={"help_text": "The constant value to fill the array with."}, - default=1 + constant: int = attr.ib( + metadata={"help_text": "The constant value to fill the array with."}, default=1 ) diff --git a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py index 8fe9da6cc..580f54d63 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py @@ -617,10 +617,10 @@ def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume: neuroglancer.LocalVolume: The combined neuroglancer source. """ source_array_volume = self._source_array._neuroglancer_source() - if isinstance(source_array_volume,list): + if isinstance(source_array_volume, list): source_array_volume = source_array_volume[0] result_data = self._neuroglancer_source() - + return neuroglancer.LocalVolume( data=result_data, dimensions=source_array_volume.dimensions, @@ -641,7 +641,9 @@ def _neuroglancer_layer(self): This method is used to return the neuroglancer layer for the source array. """ # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source()) - return neuroglancer.SegmentationLayer(source=self._combined_neuroglancer_source()) + return neuroglancer.SegmentationLayer( + source=self._combined_neuroglancer_source() + ) def _source_name(self): """ @@ -683,4 +685,4 @@ def _source_name(self): name = self._source_array._source_name() if isinstance(name, list): name = "_".join(name) - return "logical_or"+name + return "logical_or" + name diff --git a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py b/dacapo/experiments/datasplits/datasets/arrays/ones_array.py index 1e5889ff3..16e2d76ec 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/ones_array.py @@ -8,6 +8,7 @@ logger = logging.getLogger(__name__) + class OnesArray(Array): """ This is a wrapper around another `source_array` that simply provides ones diff --git a/dacapo/experiments/datasplits/datasets/dataset.py b/dacapo/experiments/datasplits/datasets/dataset.py index 5c70ea307..d3591b447 100644 --- a/dacapo/experiments/datasplits/datasets/dataset.py +++ b/dacapo/experiments/datasplits/datasets/dataset.py @@ -137,10 +137,7 @@ def _neuroglancer_layers(self, prefix="", exclude_layers=None): and self.raw._source_name() not in exclude_layers ): layers[self.raw._source_name()] = self.raw._neuroglancer_layer() - if ( - self.gt is not None - and self.gt._can_neuroglance() - ): + if self.gt is not None and self.gt._can_neuroglance(): new_layers = self.gt._neuroglancer_layer() if isinstance(new_layers, list): names = self.gt._source_name() @@ -149,10 +146,7 @@ def _neuroglancer_layers(self, prefix="", exclude_layers=None): layers[name] = layer elif self.gt._source_name() not in exclude_layers: layers[self.gt._source_name()] = new_layers - if ( - self.mask is not None - and self.mask._can_neuroglance() - ): + if self.mask is not None and self.mask._can_neuroglance(): new_layers = self.mask._neuroglancer_layer() if isinstance(new_layers, list): names = self.mask._source_name() @@ -160,5 +154,5 @@ def _neuroglancer_layers(self, prefix="", exclude_layers=None): if name not in exclude_layers: layers[name] = layer elif self.gt._source_name() not in exclude_layers: - layers["mask_"+self.mask._source_name()] = new_layers + layers["mask_" + self.mask._source_name()] = new_layers return layers diff --git a/dacapo/experiments/datasplits/datasplit_generator.py b/dacapo/experiments/datasplits/datasplit_generator.py index 7c5b0a331..74e067546 100644 --- a/dacapo/experiments/datasplits/datasplit_generator.py +++ b/dacapo/experiments/datasplits/datasplit_generator.py @@ -721,7 +721,9 @@ def __generate_semantic_seg_datasplit(self): train_dataset_configs = [] validation_dataset_configs = [] for dataset in self.datasets: - raw_config, gt_config, mask_config = self.__generate_semantic_seg_dataset_crop(dataset) + raw_config, gt_config, mask_config = ( + self.__generate_semantic_seg_dataset_crop(dataset) + ) if dataset.dataset_type == DatasetType.train: train_dataset_configs.append( RawGTDatasetConfig( @@ -835,23 +837,22 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec): # groupings=[(current_class_name, [])], # ) organelle_arrays[current_class_name] = gt_config - + if self.targets is None: targets_str = "_".join(classes) current_targets = classes else: current_targets = self.targets targets_str = "_".join(self.targets) - + target_images = {} target_masks = {} - missing_classes = [c for c in current_targets if c not in classes] found_classes = [c for c in current_targets if c in classes] for t in found_classes: target_images[t] = organelle_arrays[t] - + if len(missing_classes) > 0: if not self.use_negative_class: raise ValueError( @@ -865,9 +866,9 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec): # generate negative class if len(organelle_arrays) > 1: found_gt_config = ConcatArrayConfig( - name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_gt", - channels=list(organelle_arrays.keys()), - source_array_configs=organelle_arrays, + name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_gt", + channels=list(organelle_arrays.keys()), + source_array_configs=organelle_arrays, ) missing_mask_config = LogicalOrArrayConfig( name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_labelled_voxels", @@ -883,7 +884,7 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec): for t in missing_classes: target_images[t] = missing_gt_config target_masks[t] = missing_mask_config - + for t in found_classes: target_masks[t] = ConstantArrayConfig( name=f"{dataset}_{t}_{self.output_resolution[0]}nm_labelled_voxels", @@ -891,9 +892,6 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec): constant=1, ) - - - if len(target_images) > 1: gt_config = ConcatArrayConfig( name=f"{dataset}_{targets_str}_{self.output_resolution[0]}nm_gt", From 73bddd8f10e7cf5c2445f9fd9b533eba9d71149a Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Mon, 17 Jun 2024 18:41:58 -0400 Subject: [PATCH 3/4] fix hot_distance --- dacapo/experiments/tasks/predictors/hot_distance_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py index c25df23ec..9b067f230 100644 --- a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py @@ -188,7 +188,7 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): self.dt_scale_factor, ) else: - distance_mask = np.ones_like(target.data) + distance_mask = np.ones_like(gt.data) distance_weights, distance_moving_class_counts = balance_weights( gt[target.roi], From 3b4a5df93ffce6d6801f82248c9ca8342b4cc400 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Thu, 20 Jun 2024 10:49:47 -0400 Subject: [PATCH 4/4] fix error run name --- dacapo/experiments/run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py index 55821a6c0..c7b8c8f7d 100644 --- a/dacapo/experiments/run.py +++ b/dacapo/experiments/run.py @@ -212,3 +212,6 @@ def move_optimizer( state[k] = v.to(device) if empty_cuda_cache: torch.cuda.empty_cache() + + def __str__(self): + return self.name