From 4d6fa632530c7b6d95af7b020cab592fc6db0008 Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Thu, 13 Jun 2024 10:01:24 -0400
Subject: [PATCH 1/4] use negative crops + better neutoglancer

---
 .../datasplits/datasets/arrays/__init__.py    |   1 +
 .../datasets/arrays/concat_array.py           |  63 +++
 .../datasets/arrays/constant_array.py         | 487 ++++++++++++++++++
 .../datasets/arrays/constant_array_config.py  |  33 ++
 .../datasets/arrays/logical_or_array.py       |  75 ++-
 .../datasplits/datasets/arrays/ones_array.py  |   4 +
 .../datasets/arrays/resampled_array.py        |   9 +-
 .../datasplits/datasets/dataset.py            |  20 +-
 .../datasplits/datasplit_generator.py         |  98 +++-
 9 files changed, 734 insertions(+), 56 deletions(-)
 create mode 100644 dacapo/experiments/datasplits/datasets/arrays/constant_array.py
 create mode 100644 dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py

diff --git a/dacapo/experiments/datasplits/datasets/arrays/__init__.py b/dacapo/experiments/datasplits/datasets/arrays/__init__.py
index 63d6d6e21..74091aba0 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/__init__.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/__init__.py
@@ -22,3 +22,4 @@
 
 # nonconfigurable arrays (helpers)
 from .numpy_array import NumpyArray  # noqa
+from .constant_array_config import ConstantArray, ConstantArrayConfig  # noqa
diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py
index 2cea77a00..dec570731 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py
@@ -459,3 +459,66 @@ def __getitem__(self, roi: Roi) -> np.ndarray:
                 f"Concatenated array has only one channel: {self.name} {concatenated.shape}"
             )
         return concatenated
+
+    def _can_neuroglance(self):
+        """
+        This method returns True if the source array can be visualized in neuroglance.
+
+        Returns:
+            bool: True if the source array can be visualized in neuroglance.
+        Raises:
+            ValueError: If the source array is not writable.
+        Examples:
+            >>> binarize_array._can_neuroglance()
+        Note:
+            This method is used to return True if the source array can be visualized in neuroglance.
+        """
+        return any([source_array._can_neuroglance() for source_array in self.source_arrays.values()])
+
+    def _neuroglancer_source(self):
+        """
+        This method returns the source array for neuroglancer.
+
+        Returns:
+            neuroglancer.LocalVolume: The source array for neuroglancer.
+        Raises:
+            ValueError: If the source array is not writable.
+        Examples:
+            >>> binarize_array._neuroglancer_source()
+        Note:
+            This method is used to return the source array for neuroglancer.
+        """
+        # return self._source_array._neuroglancer_source()
+        return [source_array._neuroglancer_source() for source_array in self.source_arrays.values()]
+
+    def _neuroglancer_layer(self):
+        """
+        This method returns the neuroglancer layer for the source array.
+
+        Returns:
+            neuroglancer.SegmentationLayer: The neuroglancer layer for the source array.
+        Raises:
+            ValueError: If the source array is not writable.
+        Examples:
+            >>> binarize_array._neuroglancer_layer()
+        Note:
+            This method is used to return the neuroglancer layer for the source array.
+        """
+        # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source())
+        return [source_array._neuroglancer_layer() for source_array in self.source_arrays.values() if source_array._can_neuroglance()]
+
+    def _source_name(self):
+        """
+        This method returns the name of the source array.
+
+        Returns:
+            str: The name of the source array.
+        Raises:
+            ValueError: If the source array is not writable.
+        Examples:
+            >>> binarize_array._source_name()
+        Note:
+            This method is used to return the name of the source array.
+        """
+        # return self._source_array._source_name()
+        return [source_array._source_name() for source_array in self.source_arrays.values() if source_array._can_neuroglance()]
diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array.py
new file mode 100644
index 000000000..c5cb77eea
--- /dev/null
+++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array.py
@@ -0,0 +1,487 @@
+from .array import Array
+
+from funlib.geometry import Roi
+
+import numpy as np
+import neuroglancer
+
+
+class ConstantArray(Array):
+    """
+    This is a wrapper around another `source_array` that simply provides constant value
+    with the same metadata as the `source_array`.
+
+    This is useful for creating a mask array that is the same size as the
+    original array, but with all values set to 1.
+
+    Attributes:
+        source_array: The source array that this array is based on.
+    Methods:
+        like: Create a new ConstantArray with the same metadata as another array.
+        attrs: Get the attributes of the array.
+        axes: Get the axes of the array.
+        dims: Get the dimensions of the array.
+        voxel_size: Get the voxel size of the array.
+        roi: Get the region of interest of the array.
+        writable: Check if the array is writable.
+        data: Get the data of the array.
+        dtype: Get the data type of the array.
+        num_channels: Get the number of channels of the array.
+        __getitem__: Get a subarray of the array.
+    Note:
+        This class is not meant to be instantiated directly. Instead, use the
+        `like` method to create a new ConstantArray with the same metadata as
+        another array.
+    """
+
+    def __init__(self, array_config):
+        """
+        Initialize the ConstantArray with the given array configuration.
+
+        Args:
+            array_config: The configuration of the source array.
+        Raises:
+            RuntimeError: If the source array is not specified in the
+                configuration.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ArrayConfig
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> from funlib.geometry import Roi
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> source_array_config = ArrayConfig(source_array)
+            >>> ones_array = ConstantArray(source_array_config)
+            >>> ones_array.source_array
+            NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1)
+        Notes:
+            This class is not meant to be instantiated directly. Instead, use the
+            `like` method to create a new ConstantArray with the same metadata as
+            another array.
+        """
+        self._source_array = array_config.source_array_config.array_type(
+            array_config.source_array_config
+        )
+        self._constant = array_config.constant
+
+    @classmethod
+    def like(cls, array: Array):
+        """
+        Create a new ConstantArray with the same metadata as another array.
+
+        Args:
+            array: The source array.
+        Returns:
+            The new ConstantArray with the same metadata as the source array.
+        Raises:
+            RuntimeError: If the source array is not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray.like(source_array)
+            >>> ones_array.source_array
+            NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1)
+        Notes:
+            This class is not meant to be instantiated directly. Instead, use the
+            `like` method to create a new ConstantArray with the same metadata as
+            another array.
+
+        """
+        instance = cls.__new__(cls)
+        instance._source_array = array
+        return instance
+
+    @property
+    def attrs(self):
+        """
+        Get the attributes of the array.
+
+        Returns:
+            An empty dictionary.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.attrs
+            {}
+        Notes:
+            This method is used to get the attributes of the array. The attributes
+            are stored as key-value pairs in a dictionary. This method returns an
+            empty dictionary because the ConstantArray does not have any attributes.
+        """
+        return dict()
+
+    @property
+    def source_array(self) -> Array:
+        """
+        Get the source array that this array is based on.
+
+        Returns:
+            The source array.
+        Raises:
+            RuntimeError: If the source array is not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.source_array
+            NumpyArray(data=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
+                                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]), voxel_size=(1.0, 1.0, 1.0), roi=Roi((0, 0, 0), (10, 10, 10)), num_channels=1)
+        Notes:
+            This method is used to get the source array that this array is based on.
+            The source array is the array that the ConstantArray is created from. This
+            method returns the source array that was specified when the ConstantArray
+            was created.
+        """
+        return self._source_array
+
+    @property
+    def axes(self):
+        """
+        Get the axes of the array.
+
+        Returns:
+            The axes of the array.
+        Raises:
+            RuntimeError: If the axes are not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.axes
+            'zyx'
+        Notes:
+            This method is used to get the axes of the array. The axes are the
+            order of the dimensions of the array. This method returns the axes of
+            the array that was specified when the ConstantArray was created.
+        """
+        return self.source_array.axes
+
+    @property
+    def dims(self):
+        """
+        Get the dimensions of the array.
+
+        Returns:
+            The dimensions of the array.
+        Raises:
+            RuntimeError: If the dimensions are not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.dims
+            (10, 10, 10)
+        Notes:
+            This method is used to get the dimensions of the array. The dimensions
+            are the size of the array along each axis. This method returns the
+            dimensions of the array that was specified when the ConstantArray was created.
+        """
+        return self.source_array.dims
+
+    @property
+    def voxel_size(self):
+        """
+        Get the voxel size of the array.
+
+        Returns:
+            The voxel size of the array.
+        Raises:
+            RuntimeError: If the voxel size is not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.voxel_size
+            (1.0, 1.0, 1.0)
+        Notes:
+            This method is used to get the voxel size of the array. The voxel size
+            is the size of each voxel in the array. This method returns the voxel
+            size of the array that was specified when the ConstantArray was created.
+        """
+        return self.source_array.voxel_size
+
+    @property
+    def roi(self):
+        """
+        Get the region of interest of the array.
+
+        Returns:
+            The region of interest of the array.
+        Raises:
+            RuntimeError: If the region of interest is not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> from funlib.geometry import Roi
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.roi
+            Roi((0, 0, 0), (10, 10, 10))
+        Notes:
+            This method is used to get the region of interest of the array. The
+            region of interest is the region of the array that contains the data.
+            This method returns the region of interest of the array that was specified
+            when the ConstantArray was created.
+        """
+        return self.source_array.roi
+
+    @property
+    def writable(self) -> bool:
+        """
+        Check if the array is writable.
+
+        Returns:
+            False.
+        Raises:
+            RuntimeError: If the writability of the array is not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.writable
+            False
+        Notes:
+            This method is used to check if the array is writable. An array is
+            writable if it can be modified in place. This method returns False
+            because the ConstantArray is read-only and cannot be modified.
+        """
+        return False
+
+    @property
+    def data(self):
+        """
+        Get the data of the array.
+
+        Returns:
+            The data of the array.
+        Raises:
+            RuntimeError: If the data is not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.data
+            array([[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]])
+        Notes:
+            This method is used to get the data of the array. The data is the
+            values that are stored in the array. This method returns a subarray
+            of the array with all values set to 1.
+        """
+        raise RuntimeError("Cannot get writable version of this data!")
+
+    @property
+    def dtype(self):
+        """
+        Get the data type of the array.
+
+        Returns:
+            The data type of the array.
+        Raises:
+            RuntimeError: If the data type is not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.dtype
+            <class 'numpy.bool_'>
+        Notes:
+            This method is used to get the data type of the array. The data type
+            is the type of the values that are stored in the array. This method
+            returns the data type of the array that was specified when the ConstantArray
+            was created.
+        """
+        return bool
+
+    @property
+    def num_channels(self):
+        """
+        Get the number of channels of the array.
+
+        Returns:
+            The number of channels of the array.
+        Raises:
+            RuntimeError: If the number of channels is not specified.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> ones_array.num_channels
+            1
+        Notes:
+            This method is used to get the number of channels of the array. The
+            number of channels is the number of values that are stored at each
+            voxel in the array. This method returns the number of channels of the
+            array that was specified when the ConstantArray was created.
+        """
+        return self.source_array.num_channels
+
+    def __getitem__(self, roi: Roi) -> np.ndarray:
+        """
+        Get a subarray of the array.
+
+        Args:
+            roi: The region of interest.
+        Returns:
+            A subarray of the array with all values set to 1.
+        Examples:
+            >>> from dacapo.experiments.datasplits.datasets.arrays import ConstantArray
+            >>> from dacapo.experiments.datasplits.datasets.arrays import NumpyArray
+            >>> from funlib.geometry import Roi
+            >>> import numpy as np
+            >>> source_array = NumpyArray(np.zeros((10, 10, 10)))
+            >>> ones_array = ConstantArray(source_array)
+            >>> roi = Roi((0, 0, 0), (10, 10, 10))
+            >>> ones_array[roi]
+            array([[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]])
+        Notes:
+            This method is used to get a subarray of the array. The subarray is
+            specified by the region of interest. This method returns a subarray
+            of the array with all values set to 1.
+        """
+        return np.ones_like(self.source_array.__getitem__(roi), dtype=bool) * self._constant
+
+    def _can_neuroglance(self):
+        """
+        This method returns True if the source array can be visualized in neuroglance.
+
+        Returns:
+            bool: True if the source array can be visualized in neuroglance.
+        Raises:
+            ValueError: If the source array is not writable.
+        Examples:
+            >>> binarize_array._can_neuroglance()
+        Note:
+            This method is used to return True if the source array can be visualized in neuroglance.
+        """
+        return True
+
+    def _neuroglancer_source(self):
+        """
+        This method returns the source array for neuroglancer.
+
+        Returns:
+            neuroglancer.LocalVolume: The source array for neuroglancer.
+        Raises:
+            ValueError: If the source array is not writable.
+        Examples:
+            >>> binarize_array._neuroglancer_source()
+        Note:
+            This method is used to return the source array for neuroglancer.
+        """
+        # return self._source_array._neuroglancer_source()
+        return np.ones_like(self.source_array.data, dtype=np.uint64) * self._constant
+    
+    def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume:
+        """
+        Combines dimensions and metadata from self._source_array._neuroglancer_source()
+        with data from self._neuroglancer_source().
+
+        Returns:
+            neuroglancer.LocalVolume: The combined neuroglancer source.
+        """
+        source_array_volume = self._source_array._neuroglancer_source()
+        result_data = self._neuroglancer_source()
+        
+        return neuroglancer.LocalVolume(
+            data=result_data,
+            dimensions=source_array_volume.dimensions,
+            voxel_offset=source_array_volume.voxel_offset,
+        )
+
+    def _neuroglancer_layer(self):
+        """
+        This method returns the neuroglancer layer for the source array.
+
+        Returns:
+            neuroglancer.SegmentationLayer: The neuroglancer layer for the source array.
+        Raises:
+            ValueError: If the source array is not writable.
+        Examples:
+            >>> binarize_array._neuroglancer_layer()
+        Note:
+            This method is used to return the neuroglancer layer for the source array.
+        """
+        # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source())
+        return neuroglancer.SegmentationLayer(source=self._combined_neuroglancer_source())
+
+    def _source_name(self):
+        """
+        This method returns the name of the source array.
+
+        Returns:
+            str: The name of the source array.
+        Raises:
+            ValueError: If the source array is not writable.
+        Examples:
+            >>> binarize_array._source_name()
+        Note:
+            This method is used to return the name of the source array.
+        """
+        # return self._source_array._source_name()
+        return f"{self._constant}_of_{self.source_array._source_name()}"
\ No newline at end of file
diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py
new file mode 100644
index 000000000..9c8521d42
--- /dev/null
+++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py
@@ -0,0 +1,33 @@
+import attr
+
+from .array_config import ArrayConfig
+from .constant_array import ConstantArray
+
+
+@attr.s
+class ConstantArrayConfig(ArrayConfig):
+    """
+    This array read data from the source array and then return a np.ones_like() version.
+
+    This is useful for creating a mask array from a source array. For example, if you have a
+    2D array of data and you want to create a mask array that is the same shape as the data
+    array, you can use this class to create the mask array.
+
+    Attributes:
+        source_array_config: The source array that you want to copy and fill with ones.
+    Methods:
+        create_array: Create the array.
+    Note:
+        This class is a subclass of ArrayConfig.
+    """
+
+    array_type = ConstantArray
+
+    source_array_config: ArrayConfig = attr.ib(
+        metadata={"help_text": "The Array that you want to copy and fill with ones."}
+    )
+
+    constant : int = attr.ib(
+        metadata={"help_text": "The constant value to fill the array with."},
+        default=1
+    )
diff --git a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py
index 212d933ac..8fe9da6cc 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py
@@ -600,52 +600,48 @@ def _neuroglancer_source(self):
             The _neuroglancer_source method is used to get the neuroglancer source of the array.
             The neuroglancer source is the source that is displayed in the neuroglancer viewer.
         """
+        # source_arrays
+        if hassattr(self._source_array, "source_arrays"):
+            source_arrays = list(self._source_array.source_arrays)
+            # apply logical or
+            mask = np.logical_or.reduce(source_arrays)
+            return mask
         return self._source_array._neuroglancer_source()
 
+    def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume:
+        """
+        Combines dimensions and metadata from self._source_array._neuroglancer_source()
+        with data from self._neuroglancer_source().
+
+        Returns:
+            neuroglancer.LocalVolume: The combined neuroglancer source.
+        """
+        source_array_volume = self._source_array._neuroglancer_source()
+        if isinstance(source_array_volume,list):
+            source_array_volume = source_array_volume[0]
+        result_data = self._neuroglancer_source()
+        
+        return neuroglancer.LocalVolume(
+            data=result_data,
+            dimensions=source_array_volume.dimensions,
+            voxel_offset=source_array_volume.voxel_offset,
+        )
+
     def _neuroglancer_layer(self):
         """
-        Get the neuroglancer layer of the array
+        This method returns the neuroglancer layer for the source array.
 
         Returns:
-            Tuple[neuroglancer.Layer, dict]: The neuroglancer layer of the array
+            neuroglancer.SegmentationLayer: The neuroglancer layer for the source array.
         Raises:
-            ValueError: If the array is not writable
+            ValueError: If the source array is not writable.
         Examples:
-            >>> array_config = MergeInstancesArrayConfig(
-            ...     name="logical_or",
-            ...     source_array_configs=[
-            ...         ArrayConfig(
-            ...             name="mask1",
-            ...             array_type=MaskArray,
-            ...             source_array_config=MaskArrayConfig(
-            ...                 name="mask1",
-            ...                 mask_id=1,
-            ...             ),
-            ...         ),
-            ...         ArrayConfig(
-            ...             name="mask2",
-            ...             array_type=MaskArray,
-            ...             source_array_config=MaskArrayConfig(
-            ...                 name="mask2",
-            ...                 mask_id=2,
-            ...             ),
-            ...         ),
-            ...     ],
-            ... )
-            >>> array = array_config.create_array()
-            >>> array._neuroglancer_layer()
-            (SegmentationLayer(source='precomputed://https://mybucket.storage.googleapis.com/path/to/logical_or'), {'visible': False})
-        Notes:
-            The _neuroglancer_layer method is used to get the neuroglancer layer of the array.
-            The neuroglancer layer is the layer that is displayed in the neuroglancer viewer.
+            >>> binarize_array._neuroglancer_layer()
+        Note:
+            This method is used to return the neuroglancer layer for the source array.
         """
-        # Generates an Segmentation layer
-
-        layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source())
-        kwargs = {
-            "visible": False,
-        }
-        return layer, kwargs
+        # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source())
+        return neuroglancer.SegmentationLayer(source=self._combined_neuroglancer_source())
 
     def _source_name(self):
         """
@@ -684,4 +680,7 @@ def _source_name(self):
             The _source_name method is used to get the name of the source array. The name
             of the source array is the name of the array that is being modified.
         """
-        return self._source_array._source_name()
+        name = self._source_array._source_name()
+        if isinstance(name, list):
+            name = "_".join(name)
+        return "logical_or"+name
diff --git a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py b/dacapo/experiments/datasplits/datasets/arrays/ones_array.py
index 6fd5c4faf..1e5889ff3 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/ones_array.py
@@ -4,6 +4,9 @@
 
 import numpy as np
 
+import logging
+
+logger = logging.getLogger(__name__)
 
 class OnesArray(Array):
     """
@@ -402,4 +405,5 @@ def __getitem__(self, roi: Roi) -> np.ndarray:
             specified by the region of interest. This method returns a subarray
             of the array with all values set to 1.
         """
+        logger.warning("OnesArray is deprecated. Use ConstantArray instead.")
         return np.ones_like(self.source_array.__getitem__(roi), dtype=bool)
diff --git a/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py b/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py
index 5c60a5df4..ba6fd99f0 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/resampled_array.py
@@ -236,10 +236,11 @@ def data(self):
         Note:
             This method returns the data of the resampled array.
         """
-        raise ValueError(
-            "Cannot get a writable view of this array because it is a virtual "
-            "array created by modifying another array on demand."
-        )
+        return self._source_array.data
+        # raise ValueError(
+        #     "Cannot get a writable view of this array because it is a virtual "
+        #     "array created by modifying another array on demand."
+        # )
 
     @property
     def scale(self):
diff --git a/dacapo/experiments/datasplits/datasets/dataset.py b/dacapo/experiments/datasplits/datasets/dataset.py
index ced4f58d6..5c70ea307 100644
--- a/dacapo/experiments/datasplits/datasets/dataset.py
+++ b/dacapo/experiments/datasplits/datasets/dataset.py
@@ -140,13 +140,25 @@ def _neuroglancer_layers(self, prefix="", exclude_layers=None):
         if (
             self.gt is not None
             and self.gt._can_neuroglance()
-            and self.gt._source_name() not in exclude_layers
         ):
-            layers[self.gt._source_name()] = self.gt._neuroglancer_layer()
+            new_layers = self.gt._neuroglancer_layer()
+            if isinstance(new_layers, list):
+                names = self.gt._source_name()
+                for name, layer in zip(names, new_layers):
+                    if name not in exclude_layers:
+                        layers[name] = layer
+            elif self.gt._source_name() not in exclude_layers:
+                layers[self.gt._source_name()] = new_layers
         if (
             self.mask is not None
             and self.mask._can_neuroglance()
-            and self.mask._source_name() not in exclude_layers
         ):
-            layers[self.mask._source_name()] = self.mask._neuroglancer_layer()
+            new_layers = self.mask._neuroglancer_layer()
+            if isinstance(new_layers, list):
+                names = self.mask._source_name()
+                for name, layer in zip(names, new_layers):
+                    if name not in exclude_layers:
+                        layers[name] = layer
+            elif self.gt._source_name() not in exclude_layers:
+                layers["mask_"+self.mask._source_name()] = new_layers
         return layers
diff --git a/dacapo/experiments/datasplits/datasplit_generator.py b/dacapo/experiments/datasplits/datasplit_generator.py
index a1a0e45c3..7c5b0a331 100644
--- a/dacapo/experiments/datasplits/datasplit_generator.py
+++ b/dacapo/experiments/datasplits/datasplit_generator.py
@@ -13,6 +13,8 @@
     BinarizeArrayConfig,
     IntensitiesArrayConfig,
     ConcatArrayConfig,
+    LogicalOrArrayConfig,
+    ConstantArrayConfig,
 )
 from dacapo.experiments.datasplits import TrainValidateDataSplitConfig
 from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig
@@ -477,6 +479,7 @@ def __init__(
         raw_min=0,
         raw_max=255,
         classes_separator_caracter="&",
+        use_negative_class=False,
     ):
         """
         Initializes the DataSplitGenerator class with the specified:
@@ -565,6 +568,12 @@ def __init__(
         self.raw_min = raw_min
         self.raw_max = raw_max
         self.classes_separator_caracter = classes_separator_caracter
+        self.use_negative_class = use_negative_class
+        if use_negative_class:
+            if targets is None:
+                raise ValueError(
+                    "use_negative_class=True requires targets to be specified."
+                )
 
     def __str__(self) -> str:
         """
@@ -712,13 +721,14 @@ def __generate_semantic_seg_datasplit(self):
         train_dataset_configs = []
         validation_dataset_configs = []
         for dataset in self.datasets:
-            raw_config, gt_config = self.__generate_semantic_seg_dataset_crop(dataset)
+            raw_config, gt_config, mask_config = self.__generate_semantic_seg_dataset_crop(dataset)
             if dataset.dataset_type == DatasetType.train:
                 train_dataset_configs.append(
                     RawGTDatasetConfig(
                         name=f"{dataset}_{self.class_name}_{self.output_resolution[0]}nm",
                         raw_config=raw_config,
                         gt_config=gt_config,
+                        mask_config=mask_config,
                     )
                 )
             else:
@@ -727,6 +737,7 @@ def __generate_semantic_seg_datasplit(self):
                         name=f"{dataset}_{self.class_name}_{self.output_resolution[0]}nm",
                         raw_config=raw_config,
                         gt_config=gt_config,
+                        mask_config=mask_config,
                     )
                 )
         if type(self.class_name) == list:
@@ -794,7 +805,10 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec):
             max=self.raw_max,
         )
         organelle_arrays = {}
-        classes_datasets, classes = self.check_class_name(gt_dataset)
+        # classes_datasets, classes = self.check_class_name(gt_dataset)
+        classes_datasets, classes = format_class_name(
+            gt_dataset, self.classes_separator_caracter
+        )
         for current_class_dataset, current_class_name in zip(classes_datasets, classes):
             if not (gt_path / current_class_dataset).exists():
                 raise FileNotFoundError(
@@ -815,26 +829,90 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec):
                     self.output_resolution,
                     "gt",
                 )
-            gt_config = BinarizeArrayConfig(
-                f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_binarized",
-                source_array_config=gt_config,
-                groupings=[(current_class_name, [])],
-            )
+            # gt_config = BinarizeArrayConfig(
+            #     f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_binarized",
+            #     source_array_config=gt_config,
+            #     groupings=[(current_class_name, [])],
+            # )
             organelle_arrays[current_class_name] = gt_config
+        
         if self.targets is None:
             targets_str = "_".join(classes)
             current_targets = classes
         else:
             current_targets = self.targets
             targets_str = "_".join(self.targets)
-        if len(organelle_arrays) > 1:
+        
+        target_images = {}
+        target_masks = {}
+        
+
+        missing_classes = [c for c in current_targets if c not in classes]
+        found_classes = [c for c in current_targets if c in classes]
+        for t in found_classes:
+            target_images[t] = organelle_arrays[t]
+        
+        if len(missing_classes) > 0:
+            if not self.use_negative_class:
+                raise ValueError(
+                    f"Missing classes found, {str(missing_classes)}, please specify use_negative_class=True to generate the missing classes."
+                )
+            else:
+                if len(organelle_arrays) == 0:
+                    raise ValueError(
+                        f"No target classes found, please specify targets to generate the negative classes."
+                    )
+                # generate negative class
+                if len(organelle_arrays) > 1:
+                    found_gt_config = ConcatArrayConfig(
+                    name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_gt",
+                    channels=list(organelle_arrays.keys()),
+                    source_array_configs=organelle_arrays,
+                    )
+                    missing_mask_config = LogicalOrArrayConfig(
+                        name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_labelled_voxels",
+                        source_array_config=found_gt_config,
+                    )
+                else:
+                    missing_mask_config = list(organelle_arrays.values())[0]
+                missing_gt_config = ConstantArrayConfig(
+                    name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_gt",
+                    source_array_config=list(organelle_arrays.values())[0],
+                    constant=0,
+                )
+                for t in missing_classes:
+                    target_images[t] = missing_gt_config
+                    target_masks[t] = missing_mask_config
+            
+        for t in found_classes:
+            target_masks[t] = ConstantArrayConfig(
+                name=f"{dataset}_{t}_{self.output_resolution[0]}nm_labelled_voxels",
+                source_array_config=target_images[t],
+                constant=1,
+            )
+
+        
+
+
+        if len(target_images) > 1:
             gt_config = ConcatArrayConfig(
                 name=f"{dataset}_{targets_str}_{self.output_resolution[0]}nm_gt",
                 channels=[organelle for organelle in current_targets],
-                source_array_configs={k: gt for k, gt in organelle_arrays.items()},
+                # source_array_configs={k: gt for k, gt in target_images.items()},
+                source_array_configs={k: target_images[k] for k in current_targets},
+            )
+            mask_config = ConcatArrayConfig(
+                name=f"{dataset}_{targets_str}_{self.output_resolution[0]}nm_mask",
+                channels=[organelle for organelle in current_targets],
+                # source_array_configs={k: mask for k, mask in target_masks.items()},
+                # to be sure to have the same order
+                source_array_configs={k: target_masks[k] for k in current_targets},
             )
+        else:
+            gt_config = list(target_images.values())[0]
+            mask_config = list(target_masks.values())[0]
 
-        return raw_config, gt_config
+        return raw_config, gt_config, mask_config
 
     # @staticmethod
     # def generate_csv(datasets: List[DatasetSpec], csv_path: Path):

From 4973bf8ac86996836fecf1338376b9841bff91a7 Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Thu, 13 Jun 2024 10:02:34 -0400
Subject: [PATCH 2/4] black format

---
 .../datasets/arrays/concat_array.py           | 24 +++++++++++++++----
 .../datasets/arrays/constant_array.py         | 15 ++++++++----
 .../datasets/arrays/constant_array_config.py  |  5 ++--
 .../datasets/arrays/logical_or_array.py       | 10 ++++----
 .../datasplits/datasets/arrays/ones_array.py  |  1 +
 .../datasplits/datasets/dataset.py            | 12 +++-------
 .../datasplits/datasplit_generator.py         | 22 ++++++++---------
 7 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py
index dec570731..c2ef40969 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py
@@ -473,7 +473,12 @@ def _can_neuroglance(self):
         Note:
             This method is used to return True if the source array can be visualized in neuroglance.
         """
-        return any([source_array._can_neuroglance() for source_array in self.source_arrays.values()])
+        return any(
+            [
+                source_array._can_neuroglance()
+                for source_array in self.source_arrays.values()
+            ]
+        )
 
     def _neuroglancer_source(self):
         """
@@ -489,7 +494,10 @@ def _neuroglancer_source(self):
             This method is used to return the source array for neuroglancer.
         """
         # return self._source_array._neuroglancer_source()
-        return [source_array._neuroglancer_source() for source_array in self.source_arrays.values()]
+        return [
+            source_array._neuroglancer_source()
+            for source_array in self.source_arrays.values()
+        ]
 
     def _neuroglancer_layer(self):
         """
@@ -505,7 +513,11 @@ def _neuroglancer_layer(self):
             This method is used to return the neuroglancer layer for the source array.
         """
         # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source())
-        return [source_array._neuroglancer_layer() for source_array in self.source_arrays.values() if source_array._can_neuroglance()]
+        return [
+            source_array._neuroglancer_layer()
+            for source_array in self.source_arrays.values()
+            if source_array._can_neuroglance()
+        ]
 
     def _source_name(self):
         """
@@ -521,4 +533,8 @@ def _source_name(self):
             This method is used to return the name of the source array.
         """
         # return self._source_array._source_name()
-        return [source_array._source_name() for source_array in self.source_arrays.values() if source_array._can_neuroglance()]
+        return [
+            source_array._source_name()
+            for source_array in self.source_arrays.values()
+            if source_array._can_neuroglance()
+        ]
diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array.py
index c5cb77eea..b76d5bd32 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/constant_array.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array.py
@@ -404,7 +404,10 @@ def __getitem__(self, roi: Roi) -> np.ndarray:
             specified by the region of interest. This method returns a subarray
             of the array with all values set to 1.
         """
-        return np.ones_like(self.source_array.__getitem__(roi), dtype=bool) * self._constant
+        return (
+            np.ones_like(self.source_array.__getitem__(roi), dtype=bool)
+            * self._constant
+        )
 
     def _can_neuroglance(self):
         """
@@ -436,7 +439,7 @@ def _neuroglancer_source(self):
         """
         # return self._source_array._neuroglancer_source()
         return np.ones_like(self.source_array.data, dtype=np.uint64) * self._constant
-    
+
     def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume:
         """
         Combines dimensions and metadata from self._source_array._neuroglancer_source()
@@ -447,7 +450,7 @@ def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume:
         """
         source_array_volume = self._source_array._neuroglancer_source()
         result_data = self._neuroglancer_source()
-        
+
         return neuroglancer.LocalVolume(
             data=result_data,
             dimensions=source_array_volume.dimensions,
@@ -468,7 +471,9 @@ def _neuroglancer_layer(self):
             This method is used to return the neuroglancer layer for the source array.
         """
         # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source())
-        return neuroglancer.SegmentationLayer(source=self._combined_neuroglancer_source())
+        return neuroglancer.SegmentationLayer(
+            source=self._combined_neuroglancer_source()
+        )
 
     def _source_name(self):
         """
@@ -484,4 +489,4 @@ def _source_name(self):
             This method is used to return the name of the source array.
         """
         # return self._source_array._source_name()
-        return f"{self._constant}_of_{self.source_array._source_name()}"
\ No newline at end of file
+        return f"{self._constant}_of_{self.source_array._source_name()}"
diff --git a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py
index 9c8521d42..47c2b8689 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/constant_array_config.py
@@ -27,7 +27,6 @@ class ConstantArrayConfig(ArrayConfig):
         metadata={"help_text": "The Array that you want to copy and fill with ones."}
     )
 
-    constant : int = attr.ib(
-        metadata={"help_text": "The constant value to fill the array with."},
-        default=1
+    constant: int = attr.ib(
+        metadata={"help_text": "The constant value to fill the array with."}, default=1
     )
diff --git a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py
index 8fe9da6cc..580f54d63 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/logical_or_array.py
@@ -617,10 +617,10 @@ def _combined_neuroglancer_source(self) -> neuroglancer.LocalVolume:
             neuroglancer.LocalVolume: The combined neuroglancer source.
         """
         source_array_volume = self._source_array._neuroglancer_source()
-        if isinstance(source_array_volume,list):
+        if isinstance(source_array_volume, list):
             source_array_volume = source_array_volume[0]
         result_data = self._neuroglancer_source()
-        
+
         return neuroglancer.LocalVolume(
             data=result_data,
             dimensions=source_array_volume.dimensions,
@@ -641,7 +641,9 @@ def _neuroglancer_layer(self):
             This method is used to return the neuroglancer layer for the source array.
         """
         # layer = neuroglancer.SegmentationLayer(source=self._neuroglancer_source())
-        return neuroglancer.SegmentationLayer(source=self._combined_neuroglancer_source())
+        return neuroglancer.SegmentationLayer(
+            source=self._combined_neuroglancer_source()
+        )
 
     def _source_name(self):
         """
@@ -683,4 +685,4 @@ def _source_name(self):
         name = self._source_array._source_name()
         if isinstance(name, list):
             name = "_".join(name)
-        return "logical_or"+name
+        return "logical_or" + name
diff --git a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py b/dacapo/experiments/datasplits/datasets/arrays/ones_array.py
index 1e5889ff3..16e2d76ec 100644
--- a/dacapo/experiments/datasplits/datasets/arrays/ones_array.py
+++ b/dacapo/experiments/datasplits/datasets/arrays/ones_array.py
@@ -8,6 +8,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class OnesArray(Array):
     """
     This is a wrapper around another `source_array` that simply provides ones
diff --git a/dacapo/experiments/datasplits/datasets/dataset.py b/dacapo/experiments/datasplits/datasets/dataset.py
index 5c70ea307..d3591b447 100644
--- a/dacapo/experiments/datasplits/datasets/dataset.py
+++ b/dacapo/experiments/datasplits/datasets/dataset.py
@@ -137,10 +137,7 @@ def _neuroglancer_layers(self, prefix="", exclude_layers=None):
             and self.raw._source_name() not in exclude_layers
         ):
             layers[self.raw._source_name()] = self.raw._neuroglancer_layer()
-        if (
-            self.gt is not None
-            and self.gt._can_neuroglance()
-        ):
+        if self.gt is not None and self.gt._can_neuroglance():
             new_layers = self.gt._neuroglancer_layer()
             if isinstance(new_layers, list):
                 names = self.gt._source_name()
@@ -149,10 +146,7 @@ def _neuroglancer_layers(self, prefix="", exclude_layers=None):
                         layers[name] = layer
             elif self.gt._source_name() not in exclude_layers:
                 layers[self.gt._source_name()] = new_layers
-        if (
-            self.mask is not None
-            and self.mask._can_neuroglance()
-        ):
+        if self.mask is not None and self.mask._can_neuroglance():
             new_layers = self.mask._neuroglancer_layer()
             if isinstance(new_layers, list):
                 names = self.mask._source_name()
@@ -160,5 +154,5 @@ def _neuroglancer_layers(self, prefix="", exclude_layers=None):
                     if name not in exclude_layers:
                         layers[name] = layer
             elif self.gt._source_name() not in exclude_layers:
-                layers["mask_"+self.mask._source_name()] = new_layers
+                layers["mask_" + self.mask._source_name()] = new_layers
         return layers
diff --git a/dacapo/experiments/datasplits/datasplit_generator.py b/dacapo/experiments/datasplits/datasplit_generator.py
index 7c5b0a331..74e067546 100644
--- a/dacapo/experiments/datasplits/datasplit_generator.py
+++ b/dacapo/experiments/datasplits/datasplit_generator.py
@@ -721,7 +721,9 @@ def __generate_semantic_seg_datasplit(self):
         train_dataset_configs = []
         validation_dataset_configs = []
         for dataset in self.datasets:
-            raw_config, gt_config, mask_config = self.__generate_semantic_seg_dataset_crop(dataset)
+            raw_config, gt_config, mask_config = (
+                self.__generate_semantic_seg_dataset_crop(dataset)
+            )
             if dataset.dataset_type == DatasetType.train:
                 train_dataset_configs.append(
                     RawGTDatasetConfig(
@@ -835,23 +837,22 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec):
             #     groupings=[(current_class_name, [])],
             # )
             organelle_arrays[current_class_name] = gt_config
-        
+
         if self.targets is None:
             targets_str = "_".join(classes)
             current_targets = classes
         else:
             current_targets = self.targets
             targets_str = "_".join(self.targets)
-        
+
         target_images = {}
         target_masks = {}
-        
 
         missing_classes = [c for c in current_targets if c not in classes]
         found_classes = [c for c in current_targets if c in classes]
         for t in found_classes:
             target_images[t] = organelle_arrays[t]
-        
+
         if len(missing_classes) > 0:
             if not self.use_negative_class:
                 raise ValueError(
@@ -865,9 +866,9 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec):
                 # generate negative class
                 if len(organelle_arrays) > 1:
                     found_gt_config = ConcatArrayConfig(
-                    name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_gt",
-                    channels=list(organelle_arrays.keys()),
-                    source_array_configs=organelle_arrays,
+                        name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_gt",
+                        channels=list(organelle_arrays.keys()),
+                        source_array_configs=organelle_arrays,
                     )
                     missing_mask_config = LogicalOrArrayConfig(
                         name=f"{dataset}_{current_class_name}_{self.output_resolution[0]}nm_labelled_voxels",
@@ -883,7 +884,7 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec):
                 for t in missing_classes:
                     target_images[t] = missing_gt_config
                     target_masks[t] = missing_mask_config
-            
+
         for t in found_classes:
             target_masks[t] = ConstantArrayConfig(
                 name=f"{dataset}_{t}_{self.output_resolution[0]}nm_labelled_voxels",
@@ -891,9 +892,6 @@ def __generate_semantic_seg_dataset_crop(self, dataset: DatasetSpec):
                 constant=1,
             )
 
-        
-
-
         if len(target_images) > 1:
             gt_config = ConcatArrayConfig(
                 name=f"{dataset}_{targets_str}_{self.output_resolution[0]}nm_gt",

From 73bddd8f10e7cf5c2445f9fd9b533eba9d71149a Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Mon, 17 Jun 2024 18:41:58 -0400
Subject: [PATCH 3/4] fix hot_distance

---
 dacapo/experiments/tasks/predictors/hot_distance_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py
index c25df23ec..9b067f230 100644
--- a/dacapo/experiments/tasks/predictors/hot_distance_predictor.py
+++ b/dacapo/experiments/tasks/predictors/hot_distance_predictor.py
@@ -188,7 +188,7 @@ def create_weight(self, gt, target, mask, moving_class_counts=None):
                 self.dt_scale_factor,
             )
         else:
-            distance_mask = np.ones_like(target.data)
+            distance_mask = np.ones_like(gt.data)
 
         distance_weights, distance_moving_class_counts = balance_weights(
             gt[target.roi],

From 3b4a5df93ffce6d6801f82248c9ca8342b4cc400 Mon Sep 17 00:00:00 2001
From: Marwan Zouinkhi <zouinkhi.marwan@gmail.com>
Date: Thu, 20 Jun 2024 10:49:47 -0400
Subject: [PATCH 4/4] fix error run name

---
 dacapo/experiments/run.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py
index 55821a6c0..c7b8c8f7d 100644
--- a/dacapo/experiments/run.py
+++ b/dacapo/experiments/run.py
@@ -212,3 +212,6 @@ def move_optimizer(
                     state[k] = v.to(device)
         if empty_cuda_cache:
             torch.cuda.empty_cache()
+
+    def __str__(self):
+        return self.name