From e2e58da0a0561dd5b38944d40509e9d981c20570 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Tue, 17 May 2022 18:48:04 -0600
Subject: [PATCH 01/18] Improve loss objective docs + batch_index

---
 captum/optim/_core/loss.py | 372 ++++++++++++++++++++++++-------------
 1 file changed, 238 insertions(+), 134 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 66bb4c40c2..682e5b44eb 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -141,14 +141,18 @@ class BaseLoss(Loss):
     def __init__(
         self,
         target: Union[nn.Module, List[nn.Module]] = [],
-        batch_index: Optional[int] = None,
+        batch_index: Optional[Union[int, List[int]]] = None,
     ) -> None:
         super(BaseLoss, self).__init__()
         self._target = target
         if batch_index is None:
             self._batch_index = (None, None)
+        elif isinstance(batch_index, (list, tuple)):
+            self._batch_index = tuple(batch_index)
         else:
             self._batch_index = (batch_index, batch_index + 1)
+        assert all([isinstance(b, (int, type(None))) for b in self._batch_index])
+        assert len(self._batch_index) == 2
 
     @property
     def target(self) -> Union[nn.Module, List[nn.Module]]:
@@ -197,10 +201,14 @@ class LayerActivation(BaseLoss):
     their original form.
 
     Args:
-        target (nn.Module):  The layer to optimize for.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
+
+        target (nn.Module): A target layer, transform, or image parameterization
+            instance to optimize the output of.
+        batch_index (int or list of int, optional): The index or index range of
+            activations to optimize if optimizing a batch of activations. If set to
+            None, defaults to all activations in the batch. index ranges should be
+            in the format of: [start, end].
+            Default: None
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -215,18 +223,26 @@ class ChannelActivation(BaseLoss):
     Maximize activations at the target layer and target channel.
     This loss maximizes the activations of a target channel in a specified target
     layer, and can be useful to determine what features the channel is excited by.
-
-    Args:
-        target (nn.Module):  The layer to containing the channel to optimize for.
-        channel_index (int):  The index of the channel to optimize for.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
     """
 
     def __init__(
-        self, target: nn.Module, channel_index: int, batch_index: Optional[int] = None
+        self,
+        target: nn.Module,
+        channel_index: int,
+        batch_index: Optional[Union[int, List[int]]] = None,
     ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            channel_index (int): The index of the channel to optimize for.
+            batch_index (int or list of int, optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set to
+                None, defaults to all activations in the batch. index ranges should be
+                in the format of: [start, end].
+                Default: None
+        """
         BaseLoss.__init__(self, target, batch_index)
         self.channel_index = channel_index
 
@@ -250,19 +266,6 @@ class NeuronActivation(BaseLoss):
     from the specified layer. This loss is useful for determining the type of features
     that excite a neuron, and thus is often used for circuits and neuron related
     research.
-
-    Args:
-        target (nn.Module):  The layer to containing the channel to optimize for.
-        channel_index (int):  The index of the channel to optimize for.
-        x (int, optional):  The x coordinate of the neuron to optimize for. If
-            unspecified, defaults to center, or one unit left of center for even
-            lengths.
-        y (int, optional):  The y coordinate of the neuron to optimize for. If
-            unspecified, defaults to center, or one unit up of center for even
-            heights.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
     """
 
     def __init__(
@@ -271,8 +274,27 @@ def __init__(
         channel_index: int,
         x: Optional[int] = None,
         y: Optional[int] = None,
-        batch_index: Optional[int] = None,
+        batch_index: Optional[Union[int, List[int]]] = None,
     ) -> None:
+        """
+        Args:
+
+            target (nn.Module): The layer instance containing the channel to optimize for.
+            channel_index (int): The index of the channel to optimize for.
+            x (int, optional): The x coordinate of the neuron to optimize for. If
+                unspecified, defaults to center, or one unit left of center for even
+                lengths.
+                Default: None
+            y (int, optional): The y coordinate of the neuron to optimize for. If
+                unspecified, defaults to center, or one unit up of center for even
+                heights.
+                Default: None
+            batch_index (int or list of int, optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set to
+                None, defaults to all activations in the batch. index ranges should be
+                in the format of: [start, end].
+                Default: None
+        """
         BaseLoss.__init__(self, target, batch_index)
         self.channel_index = channel_index
         self.x = x
@@ -305,10 +327,14 @@ class DeepDream(BaseLoss):
     referred to as 'Deep Dream'.
 
     Args:
-        target (nn.Module):  The layer to optimize for.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
+
+        target (nn.Module): A target layer, transform, or image parameterization
+            instance to optimize the output of.
+        batch_index (int or list of int, optional): The index or index range of
+            activations to optimize if optimizing a batch of activations. If set to
+            None, defaults to all activations in the batch. index ranges should be
+            in the format of: [start, end].
+            Default: None
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -328,10 +354,14 @@ class TotalVariation(BaseLoss):
     often used to remove unwanted visual artifacts.
 
     Args:
-        target (nn.Module):  The layer to optimize for.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
+
+        target (nn.Module): A target layer, transform, or image parameterization
+            instance to optimize the output of.
+        batch_index (int or list of int, optional): The index or index range of
+            activations to optimize if optimizing a batch of activations. If set to
+            None, defaults to all activations in the batch. index ranges should be
+            in the format of: [start, end].
+            Default: None
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -346,22 +376,26 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
 class L1(BaseLoss):
     """
     L1 norm of the target layer, generally used as a penalty.
-
-    Args:
-        target (nn.Module):  The layer to optimize for.
-        constant (float):  Constant threshold to deduct from the activations.
-            Defaults to 0.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
     """
 
     def __init__(
         self,
         target: nn.Module,
         constant: float = 0.0,
-        batch_index: Optional[int] = None,
+        batch_index: Optional[Union[int, List[int]]] = None,
     ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            constant (float): Constant threshold to deduct from the activations.
+            batch_index (int or list of int, optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set to
+                None, defaults to all activations in the batch. index ranges should be
+                in the format of: [start, end].
+                Default: None
+        """
         BaseLoss.__init__(self, target, batch_index)
         self.constant = constant
 
@@ -375,34 +409,40 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
 class L2(BaseLoss):
     """
     L2 norm of the target layer, generally used as a penalty.
-
-    Args:
-        target (nn.Module):  The layer to optimize for.
-        constant (float):  Constant threshold to deduct from the activations.
-            Defaults to 0.
-        epsilon (float):  Small value to add to L2 prior to sqrt. Defaults to 1e-6.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
     """
 
     def __init__(
         self,
         target: nn.Module,
         constant: float = 0.0,
-        epsilon: float = 1e-6,
-        batch_index: Optional[int] = None,
+        eps: float = 1e-6,
+        batch_index: Optional[Union[int, List[int]]] = None,
     ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            constant (float): Constant threshold to deduct from the activations.
+                Default: 0.0
+            eps (float): Small value to add to L2 prior to sqrt.
+                Default: 1e-6
+            batch_index (int or list of int, optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set to
+                None, defaults to all activations in the batch. index ranges should be
+                in the format of: [start, end].
+                Default: None
+        """
         BaseLoss.__init__(self, target, batch_index)
         self.constant = constant
-        self.epsilon = epsilon
+        self.eps = eps
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         activations = targets_to_values[self.target][
             self.batch_index[0] : self.batch_index[1]
         ]
         activations = ((activations - self.constant) ** 2).sum()
-        return torch.sqrt(self.epsilon + activations)
+        return torch.sqrt(self.eps + activations)
 
 
 @loss_wrapper
@@ -416,13 +456,18 @@ class Diversity(BaseLoss):
     loss.
 
     Args:
-        target (nn.Module):  The layer to optimize for.
-        batch_index (int, optional):  Unused here since we are optimizing for diversity
-            across the batch.
+
+        target (nn.Module): A target layer, transform, or image parameterization
+            instance to optimize the output of.
+        batch_index (list of int, optional): The index range of activations to
+            optimize. If set to None, defaults to all activations in the batch. index
+            ranges should be in the format of: [start, end].
+            Default: None
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         activations = targets_to_values[self.target]
+        activations = activations[self.batch_index[0] : self.batch_index[1]]
         batch, channels = activations.shape[:2]
         flattened = activations.view(batch, channels, -1)
         grams = torch.matmul(flattened, torch.transpose(flattened, 1, 2))
@@ -446,23 +491,29 @@ class ActivationInterpolation(BaseLoss):
     https://distill.pub/2017/feature-visualization/#Interaction-between-Neurons
     This loss helps to interpolate or mix visualizations from two activations (layer or
     channel) by interpolating a linear sum between the two activations.
-
-    Args:
-        target1 (nn.Module):  The first layer to optimize for.
-        channel_index1 (int):  Index of channel in first layer to optimize. Defaults to
-            all channels.
-        target2 (nn.Module):  The first layer to optimize for.
-        channel_index2 (int):  Index of channel in first layer to optimize. Defaults to
-            all channels.
     """
 
     def __init__(
         self,
         target1: nn.Module = None,
-        channel_index1: int = -1,
+        channel_index1: Optional[int] = None,
         target2: nn.Module = None,
-        channel_index2: int = -1,
+        channel_index2: Optional[int] = None,
     ) -> None:
+        """
+        Args:
+
+            target1 (nn.Module): The first layer, transform, or image parameterization
+                instance to optimize the output for.
+            channel_index1 (int, optional): Index of channel in first target to
+                optimize. Default is set to None for all channels.
+                Default: None
+            target2 (nn.Module): The second layer, transform, or image parameterization
+                instance to optimize the output for.
+            channel_index2 (int, optional): Index of channel in second target to
+                optimize. Default is set to None for all channels.
+                Default: None
+        """
         self.target_one = target1
         self.channel_index_one = channel_index1
         self.target_two = target2
@@ -476,15 +527,16 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
 
         assert activations_one is not None and activations_two is not None
         # ensure channel indices are valid
-        assert (
-            self.channel_index_one < activations_one.shape[1]
-            and self.channel_index_two < activations_two.shape[1]
-        )
+        if self.channel_index_one:
+            assert self.channel_index_one < activations_one.shape[1]
+        if self.channel_index_two:
+            assert self.channel_index_two < activations_two.shape[1]
+
         assert activations_one.size(0) == activations_two.size(0)
 
-        if self.channel_index_one > -1:
+        if self.channel_index_one:
             activations_one = activations_one[:, self.channel_index_one]
-        if self.channel_index_two > -1:
+        if self.channel_index_two:
             activations_two = activations_two[:, self.channel_index_two]
         B = activations_one.size(0)
 
@@ -508,19 +560,35 @@ class Alignment(BaseLoss):
     When interpolating between activations, it may be desirable to keep image landmarks
     in the same position for visual comparison. This loss helps to minimize L2 distance
     between neighbouring images.
-
-    Args:
-        target (nn.Module):  The layer to optimize for.
-        decay_ratio (float):  How much to decay penalty as images move apart in batch.
-            Defaults to 2.
     """
 
-    def __init__(self, target: nn.Module, decay_ratio: float = 2.0) -> None:
-        BaseLoss.__init__(self, target)
+    def __init__(
+        self,
+        target: nn.Module,
+        decay_ratio: float = 2.0,
+        batch_index: Optional[List[int]] = None,
+    ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            decay_ratio (float): How much to decay penalty as images move apart in
+                the batch.
+                Default: 2.0
+            batch_index (list of int, optional): The index range of activations to
+                optimize. If set to None, defaults to all activations in the batch.
+                index ranges should be in the format of: [start, end].
+                Default: None
+        """
+        if batch_index:
+            assert len(batch_index) == 2
+        BaseLoss.__init__(self, target, batch_index)
         self.decay_ratio = decay_ratio
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         activations = targets_to_values[self.target]
+        activations = activations[self.batch_index[0] : self.batch_index[1]]
         B = activations.size(0)
 
         sum_tensor = torch.zeros(1, device=activations.device)
@@ -545,14 +613,6 @@ class Direction(BaseLoss):
     the alignment between the input vector and the layer’s activation vector. The
     dimensionality of the vector should correspond to the number of channels in the
     layer.
-
-    Args:
-        target (nn.Module):  The layer to optimize for.
-        vec (torch.Tensor):  Vector representing direction to align to.
-        cossim_pow (float, optional):  The desired cosine similarity power to use.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
     """
 
     def __init__(
@@ -562,6 +622,19 @@ def __init__(
         cossim_pow: Optional[float] = 0.0,
         batch_index: Optional[int] = None,
     ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            vec (torch.Tensor): Vector representing direction to align to.
+            cossim_pow (float, optional): The desired cosine similarity power to use.
+                Default: 0.0
+            batch_index (int, optional): The index of activations to optimize if
+                optimizing a batch of activations. If set to None, defaults to all
+                activations in the batch.
+                Default: None
+        """
         BaseLoss.__init__(self, target, batch_index)
         self.vec = vec.reshape((1, -1, 1, 1))
         self.cossim_pow = cossim_pow
@@ -581,21 +654,6 @@ class NeuronDirection(BaseLoss):
     https://distill.pub/2019/activation-atlas/#Aggregating-Multiple-Images
     Extends Direction loss by focusing on visualizing a single neuron within the
     kernel.
-
-    Args:
-        target (nn.Module):  The layer to optimize for.
-        vec (torch.Tensor):  Vector representing direction to align to.
-        x (int, optional):  The x coordinate of the neuron to optimize for. If
-            unspecified, defaults to center, or one unit left of center for even
-            lengths.
-        y (int, optional):  The y coordinate of the neuron to optimize for. If
-            unspecified, defaults to center, or one unit up of center for even
-            heights.
-        channel_index (int):  The index of the channel to optimize for.
-        cossim_pow (float, optional):  The desired cosine similarity power to use.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
     """
 
     def __init__(
@@ -608,6 +666,30 @@ def __init__(
         cossim_pow: Optional[float] = 0.0,
         batch_index: Optional[int] = None,
     ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            vec (torch.Tensor): Vector representing direction to align to.
+            x (int, optional): The x coordinate of the neuron to optimize for. If
+                set to None, defaults to center, or one unit left of center for even
+                lengths.
+                Default: None
+            y (int, optional): The y coordinate of the neuron to optimize for. If
+                set to None, defaults to center, or one unit up of center for even
+                heights.
+                Default: None
+            channel_index (int): The index of the channel to optimize for. If set to
+                None, then all channels will be used.
+                Default: None
+            cossim_pow (float, optional): The desired cosine similarity power to use.
+                Default: 0.0
+            batch_index (int, optional): The index of activations to optimize if
+                optimizing a batch of activations. If set to None, defaults to all
+                activations in the batch.
+                Default: None
+        """
         BaseLoss.__init__(self, target, batch_index)
         self.vec = vec.reshape((1, -1, 1, 1))
         self.x = x
@@ -673,16 +755,25 @@ def __init__(
     ) -> None:
         """
         Args:
-            target (nn.Module): A target layer instance.
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
             vec (torch.Tensor): A neuron direction vector to use.
             vec_whitened (torch.Tensor, optional): A whitened neuron direction vector.
+                If set to None, then no whitened vec will be used.
+                Default: None
             cossim_pow (float, optional): The desired cosine similarity power to use.
-            x (int, optional): Optionally provide a specific x position for the target
-                neuron.
-            y (int, optional): Optionally provide a specific y position for the target
-                neuron.
+            x (int, optional): The x coordinate of the neuron to optimize for. If
+                set to None, defaults to center, or one unit left of center for even
+                lengths.
+                Default: None
+            y (int, optional): The y coordinate of the neuron to optimize for. If
+                set to None, defaults to center, or one unit up of center for even
+                heights.
+                Default: None
             eps (float, optional): If cossim_pow is greater than zero, the desired
                 epsilon value to use for cosine similarity calculations.
+                Default: 1.0e-4
         """
         BaseLoss.__init__(self, target, batch_index)
         self.vec = vec.unsqueeze(0) if vec.dim() == 1 else vec
@@ -726,14 +817,6 @@ class TensorDirection(BaseLoss):
     Carter, et al., "Activation Atlas", Distill, 2019.
     https://distill.pub/2019/activation-atlas/#Aggregating-Multiple-Images
     Extends Direction loss by allowing batch-wise direction visualization.
-
-    Args:
-        target (nn.Module):  The layer to optimize for.
-        vec (torch.Tensor):  Vector representing direction to align to.
-        cossim_pow (float, optional):  The desired cosine similarity power to use.
-        batch_index (int, optional):  The index of the image to optimize if we
-            optimizing a batch of images. If unspecified, defaults to all images
-            in the batch.
     """
 
     def __init__(
@@ -743,6 +826,19 @@ def __init__(
         cossim_pow: Optional[float] = 0.0,
         batch_index: Optional[int] = None,
     ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            vec (torch.Tensor): Vector representing direction to align to.
+            cossim_pow (float, optional): The desired cosine similarity power to use.
+                Default: 0.0
+            batch_index (int, optional): The index of activations to optimize if
+                optimizing a batch of activations. If set to None, defaults to all
+                activations in the batch.
+                Default: None
+        """
         BaseLoss.__init__(self, target, batch_index)
         assert vec.dim() == 4
         self.vec = vec
@@ -774,21 +870,6 @@ class ActivationWeights(BaseLoss):
     Apply weights to channels, neurons, or spots in the target.
     This loss weighs specific channels or neurons in a given layer, via a weight
     vector.
-
-    Args:
-        target (nn.Module):  The layer to optimize for.
-        weights (torch.Tensor): Weights to apply to targets.
-        neuron (bool): Whether target is a neuron. Defaults to False.
-        x (int, optional):  The x coordinate of the neuron to optimize for. If
-            unspecified, defaults to center, or one unit left of center for even
-            lengths.
-        y (int, optional):  The y coordinate of the neuron to optimize for. If
-            unspecified, defaults to center, or one unit up of center for even
-            heights.
-        wx (int, optional):  Length of neurons to apply the weights to, along the
-            x-axis.
-        wy (int, optional):  Length of neurons to apply the weights to, along the
-            y-axis.
     """
 
     def __init__(
@@ -801,6 +882,29 @@ def __init__(
         wx: Optional[int] = None,
         wy: Optional[int] = None,
     ) -> None:
+        """
+        Args:
+
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            weights (torch.Tensor): Weights to apply to targets.
+            neuron (bool): Whether target is a neuron.
+                Default: False
+            x (int, optional): The x coordinate of the neuron to optimize for. If
+                set to None, defaults to center, or one unit left of center for even
+                lengths.
+                Default: None
+            y (int, optional): The y coordinate of the neuron to optimize for. If
+                set to None, defaults to center, or one unit up of center for even
+                heights.
+                Default: None
+            wx (int, optional): Length of neurons to apply the weights to, along the
+                x-axis. Set to None for the full length.
+                Default: None
+            wy (int, optional): Length of neurons to apply the weights to, along the
+                y-axis. Set to None for the full length.
+                Default: None
+        """
         BaseLoss.__init__(self, target)
         self.x = x
         self.y = y

From c905352ed283524c6c15e24f3218d043716b9e75 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Tue, 17 May 2022 18:58:52 -0600
Subject: [PATCH 02/18] Fix NeuronActivation docs

---
 captum/optim/_core/loss.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 682e5b44eb..194422f3f6 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -279,7 +279,8 @@ def __init__(
         """
         Args:
 
-            target (nn.Module): The layer instance containing the channel to optimize for.
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
             channel_index (int): The index of the channel to optimize for.
             x (int, optional): The x coordinate of the neuron to optimize for. If
                 unspecified, defaults to center, or one unit left of center for even

From 00927a13ee8c3aa267a95a9f3dc1662c17e47d6f Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 2 Jun 2022 14:10:06 -0600
Subject: [PATCH 03/18] Improve loss docs

* Add missing docs.
* Fix errors in existing docs.
---
 captum/optim/_core/loss.py | 153 ++++++++++++++++++++++++++++++++-----
 1 file changed, 134 insertions(+), 19 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 194422f3f6..57b63ebc1c 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -10,6 +10,14 @@
 
 
 def _make_arg_str(arg: Any) -> str:
+    """
+    Args:
+
+        args (Any): A set of arguments to covert to a string.
+
+    Returns:
+        args (str): The args in str form.
+    """
     arg = str(arg)
     too_big = len(arg) > 15 or "\n" in arg
     return arg[:15] + "..." if too_big else arg
@@ -23,7 +31,7 @@ class Loss(ABC):
     """
 
     def __init__(self) -> None:
-        super(Loss, self).__init__()
+        super().__init__()
 
     @abstractproperty
     def target(self) -> Union[nn.Module, List[nn.Module]]:
@@ -105,10 +113,35 @@ def module_op(
 ) -> "CompositeLoss":
     """
     This is a general function for applying math operations to Losses
+
+    Args:
+
+        self (Loss): A Loss objective instance.
+        other (int, float, Loss, or None): The Loss objective instance or number to
+            use on the self Loss objective as part of a math operation. If math_op
+            is a unary operation, then other should be set to None.
+        math_op (Callable): A math operator to use on the Loss instance.
+
+    Returns:
+        loss (CompositeLoss): A CompositeLoss instance with the math operations
+            created by the specified arguments.
     """
     if other is None and math_op == operator.neg:
 
         def loss_fn(module: ModuleOutputMapping) -> torch.Tensor:
+            """
+            Pass collected activations through loss objective, and then apply a unary
+            math op.
+
+            Args:
+
+                module (ModuleOutputMapping): A dict of captured activations with
+                    nn.Modules as keys.
+
+                Returns:
+                    loss (torch.Tensor): The target activations after being run
+                        through the loss objective, and the unary math_op.
+            """
             return math_op(self(module))
 
         name = self.__name__
@@ -116,6 +149,19 @@ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor:
     elif isinstance(other, (int, float)):
 
         def loss_fn(module: ModuleOutputMapping) -> torch.Tensor:
+            """
+            Pass collected activations through the loss objective and then apply the
+            math operations with numbers.
+
+            Args:
+
+                module (ModuleOutputMapping): A dict of captured activations with
+                    nn.Modules as keys.
+
+                Returns:
+                    loss (torch.Tensor): The target activations after being run
+                        through the loss objective, and then the math_op with a number.
+            """
             return math_op(self(module), other)
 
         name = self.__name__
@@ -123,6 +169,19 @@ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor:
     elif isinstance(other, Loss):
         # We take the mean of the output tensor to resolve shape mismatches
         def loss_fn(module: ModuleOutputMapping) -> torch.Tensor:
+            """
+            Pass collected activations through the loss objectives and then combine the
+            outputs with a math operation.
+
+            Args:
+
+                module (ModuleOutputMapping): A dict of captured activations with
+                    nn.Modules as keys.
+
+                Returns:
+                    loss (torch.Tensor): The target activations after being run
+                        through the loss objectives, and then merged with the math_op.
+            """
             return math_op(torch.mean(self(module)), torch.mean(other(module)))
 
         name = f"Compose({', '.join([self.__name__, other.__name__])})"
@@ -143,7 +202,18 @@ def __init__(
         target: Union[nn.Module, List[nn.Module]] = [],
         batch_index: Optional[Union[int, List[int]]] = None,
     ) -> None:
-        super(BaseLoss, self).__init__()
+        """
+        Args:
+
+            target (nn.Module or list of nn.module): A target nn.Module or list of
+                nn.Module.
+            batch_index (int or list of int, optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set to
+                None, defaults to all activations in the batch. Index ranges should be
+                in the format of: [start, end].
+                Default: None
+        """
+        super().__init__()
         self._target = target
         if batch_index is None:
             self._batch_index = (None, None)
@@ -156,10 +226,20 @@ def __init__(
 
     @property
     def target(self) -> Union[nn.Module, List[nn.Module]]:
+        """
+        Returns:
+            target (nn.Module or list of nn.Module): A target nn.Module or list of
+                nn.Module.
+        """
         return self._target
 
     @property
     def batch_index(self) -> Tuple:
+        """
+        Returns:
+            batch_index (tuple of int): A tuple of batch indices with a format
+                of: (start, end).
+        """
         return self._batch_index
 
 
@@ -170,11 +250,35 @@ def __init__(
         name: str = "",
         target: Union[nn.Module, List[nn.Module]] = [],
     ) -> None:
-        super(CompositeLoss, self).__init__(target)
+        """
+        Args:
+
+            loss_fn (Callable): A function that takes a dict of captured activations
+                with nn.Modules as keys, and then passes those activations through loss
+                objective(s) & math operations.
+            name (str, optional): The name of all composable operations in the
+                instance.
+                Default: ""
+            target (nn.Module or list of nn.module): A target nn.Module or list of
+                nn.Module.
+        """
+        super().__init__(target)
         self.__name__ = name
         self.loss_fn = loss_fn
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
+        """
+        Pass collected activations through the loss function.
+
+        Args:
+
+            module (ModuleOutputMapping): A dict of captured activations with
+                nn.Modules as keys.
+
+        Returns:
+            loss (torch.Tensor): The target activations after being run through the
+                loss function.
+        """
         return self.loss_fn(targets_to_values)
 
 
@@ -206,7 +310,7 @@ class LayerActivation(BaseLoss):
             instance to optimize the output of.
         batch_index (int or list of int, optional): The index or index range of
             activations to optimize if optimizing a batch of activations. If set to
-            None, defaults to all activations in the batch. index ranges should be
+            None, defaults to all activations in the batch. Index ranges should be
             in the format of: [start, end].
             Default: None
     """
@@ -239,7 +343,7 @@ def __init__(
             channel_index (int): The index of the channel to optimize for.
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
-                None, defaults to all activations in the batch. index ranges should be
+                None, defaults to all activations in the batch. Index ranges should be
                 in the format of: [start, end].
                 Default: None
         """
@@ -292,7 +396,7 @@ def __init__(
                 Default: None
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
-                None, defaults to all activations in the batch. index ranges should be
+                None, defaults to all activations in the batch. Index ranges should be
                 in the format of: [start, end].
                 Default: None
         """
@@ -333,7 +437,7 @@ class DeepDream(BaseLoss):
             instance to optimize the output of.
         batch_index (int or list of int, optional): The index or index range of
             activations to optimize if optimizing a batch of activations. If set to
-            None, defaults to all activations in the batch. index ranges should be
+            None, defaults to all activations in the batch. Index ranges should be
             in the format of: [start, end].
             Default: None
     """
@@ -360,7 +464,7 @@ class TotalVariation(BaseLoss):
             instance to optimize the output of.
         batch_index (int or list of int, optional): The index or index range of
             activations to optimize if optimizing a batch of activations. If set to
-            None, defaults to all activations in the batch. index ranges should be
+            None, defaults to all activations in the batch. Index ranges should be
             in the format of: [start, end].
             Default: None
     """
@@ -393,7 +497,7 @@ def __init__(
             constant (float): Constant threshold to deduct from the activations.
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
-                None, defaults to all activations in the batch. index ranges should be
+                None, defaults to all activations in the batch. Index ranges should be
                 in the format of: [start, end].
                 Default: None
         """
@@ -430,7 +534,7 @@ def __init__(
                 Default: 1e-6
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
-                None, defaults to all activations in the batch. index ranges should be
+                None, defaults to all activations in the batch. Index ranges should be
                 in the format of: [start, end].
                 Default: None
         """
@@ -461,7 +565,7 @@ class Diversity(BaseLoss):
         target (nn.Module): A target layer, transform, or image parameterization
             instance to optimize the output of.
         batch_index (list of int, optional): The index range of activations to
-            optimize. If set to None, defaults to all activations in the batch. index
+            optimize. If set to None, defaults to all activations in the batch. Index
             ranges should be in the format of: [start, end].
             Default: None
     """
@@ -579,7 +683,7 @@ def __init__(
                 Default: 2.0
             batch_index (list of int, optional): The index range of activations to
                 optimize. If set to None, defaults to all activations in the batch.
-                index ranges should be in the format of: [start, end].
+                Index ranges should be in the format of: [start, end].
                 Default: None
         """
         if batch_index:
@@ -730,7 +834,7 @@ class AngledNeuronDirection(BaseLoss):
     More information on the algorithm this objective uses can be found here:
     https://github.com/tensorflow/lucid/issues/116
 
-    This Lucid equivalents of this loss function can be found here:
+    This Lucid equivalents of this loss objective can be found here:
     https://github.com/tensorflow/lucid/blob/master/notebooks/
     activation-atlas/activation-atlas-simple.ipynb
     https://github.com/tensorflow/lucid/blob/master/notebooks/
@@ -775,6 +879,10 @@ def __init__(
             eps (float, optional): If cossim_pow is greater than zero, the desired
                 epsilon value to use for cosine similarity calculations.
                 Default: 1.0e-4
+            batch_index (int, optional): The index of activations to optimize if
+                optimizing a batch of activations. If set to None, defaults to all
+                activations in the batch.
+                Default: None
         """
         BaseLoss.__init__(self, target, batch_index)
         self.vec = vec.unsqueeze(0) if vec.dim() == 1 else vec
@@ -948,22 +1056,22 @@ def sum_loss_list(
 ) -> CompositeLoss:
     """
     Summarize a large number of losses without recursion errors. By default using 300+
-    loss functions for a single optimization task will result in exceeding Python's
+    loss objectives for a single optimization task will result in exceeding Python's
     default maximum recursion depth limit. This function can be used to avoid the
-    recursion depth limit for tasks such as summarizing a large list of loss functions
+    recursion depth limit for tasks such as summarizing a large list of loss objectives
     with the built-in sum() function.
 
     This function works similar to Lucid's optvis.objectives.Objective.sum() function.
 
     Args:
 
-        loss_list (list): A list of loss function objectives.
-        to_scalar_fn (Callable): A function for converting loss function outputs to
+        loss_list (list): A list of loss objectives.
+        to_scalar_fn (Callable): A function for converting loss objective outputs to
             scalar values, in order to prevent size mismatches.
             Default: torch.mean
 
     Returns:
-        loss_fn (CompositeLoss): A composite loss function containing all the loss
+        loss_fn (CompositeLoss): A CompositeLoss instance containing all the loss
             functions from `loss_list`.
     """
 
@@ -985,11 +1093,18 @@ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor:
 
 def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor:
     """
-    Helper function to summarize tensor outputs from loss functions.
+    Helper function to summarize tensor outputs from loss objectives.
 
     default_loss_summarize applies `mean` to the loss tensor
     and negates it so that optimizing it maximizes the activations we
     are interested in.
+
+    Args:
+
+        loss_value (torch.Tensor): A tensor containing the loss values.
+
+    Returns:
+        loss_value (torch.Tensor): The loss_value's mean multiplied by -1.
     """
     return -1 * loss_value.mean()
 

From 857f26c07eab76344543624b5ce20d2b85ec4ee1 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Mon, 20 Jun 2022 14:54:10 -0600
Subject: [PATCH 04/18] Add CompositeLoss to __all__

---
 captum/optim/_core/loss.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 57b63ebc1c..fa0808a981 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -1113,6 +1113,7 @@ def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor:
     "Loss",
     "loss_wrapper",
     "BaseLoss",
+    "CompositeLoss",
     "LayerActivation",
     "ChannelActivation",
     "NeuronActivation",

From 5e3a80fa76a5d91f015776bcb60f9615494ef946 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Fri, 24 Jun 2022 13:30:27 -0600
Subject: [PATCH 05/18] Add docs for loss_fn in sum_loss_list

---
 captum/optim/_core/loss.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index fa0808a981..f4cb3a6d89 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -1076,6 +1076,20 @@ def sum_loss_list(
     """
 
     def loss_fn(module: ModuleOutputMapping) -> torch.Tensor:
+        """
+        Pass collected activations through the list of loss objectives based on
+        specified targets, and then apply a reduction op to reduce them to scalar
+        before adding them together.
+
+        Args:
+
+            module (ModuleOutputMapping): A dict of captured activations with
+                nn.Modules as keys.
+
+        Returns:
+            loss (torch.Tensor): The target activations after being run through the
+                loss objectives, and then added together.
+        """
         return sum([to_scalar_fn(loss(module)) for loss in loss_list])
 
     name = "Sum(" + ", ".join([loss.__name__ for loss in loss_list]) + ")"

From 5f849aa4d1dd9ba9dd6cc367be0a70b8e759e8f4 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Sun, 26 Jun 2022 11:57:56 -0600
Subject: [PATCH 06/18] Fix Sphinx loss doc duplication bug

---
 captum/optim/_core/loss.py | 102 ++++++++++++++++++++++++-------------
 1 file changed, 67 insertions(+), 35 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index f4cb3a6d89..4657b23f8c 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -303,17 +303,25 @@ class LayerActivation(BaseLoss):
     Maximize activations at the target layer.
     This is the most basic loss available and it simply returns the activations in
     their original form.
+    """
 
-    Args:
+    def __init__(
+        self,
+        target: nn.Module,
+        batch_index: Optional[Union[int, List[int]]] = None,
+    ) -> None:
+        """
+        Args:
 
-        target (nn.Module): A target layer, transform, or image parameterization
-            instance to optimize the output of.
-        batch_index (int or list of int, optional): The index or index range of
-            activations to optimize if optimizing a batch of activations. If set to
-            None, defaults to all activations in the batch. Index ranges should be
-            in the format of: [start, end].
-            Default: None
-    """
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            batch_index (int or list of int, optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set
+                to None, defaults to all activations in the batch. Index ranges should
+                be in the format of: [start, end].
+                Default: None
+        """
+        BaseLoss.__init__(self, target, batch_index)
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         activations = targets_to_values[self.target]
@@ -430,17 +438,25 @@ class DeepDream(BaseLoss):
     This loss returns the squared layer activations. When combined with a negative
     mean loss summarization, this loss will create hallucinogenic visuals commonly
     referred to as 'Deep Dream'.
+    """
 
-    Args:
+    def __init__(
+        self,
+        target: nn.Module,
+        batch_index: Optional[Union[int, List[int]]] = None,
+    ) -> None:
+        """
+        Args:
 
-        target (nn.Module): A target layer, transform, or image parameterization
-            instance to optimize the output of.
-        batch_index (int or list of int, optional): The index or index range of
-            activations to optimize if optimizing a batch of activations. If set to
-            None, defaults to all activations in the batch. Index ranges should be
-            in the format of: [start, end].
-            Default: None
-    """
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            batch_index (int or list of int, optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set
+                to None, defaults to all activations in the batch. Index ranges should
+                be in the format of: [start, end].
+                Default: None
+        """
+        BaseLoss.__init__(self, target, batch_index)
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         activations = targets_to_values[self.target]
@@ -457,17 +473,25 @@ class TotalVariation(BaseLoss):
     This loss attempts to smooth / denoise the target by performing total variance
     denoising. The target is most often the image that’s being optimized. This loss is
     often used to remove unwanted visual artifacts.
+    """
 
-    Args:
+    def __init__(
+        self,
+        target: nn.Module,
+        batch_index: Optional[Union[int, List[int]]] = None,
+    ) -> None:
+        """
+        Args:
 
-        target (nn.Module): A target layer, transform, or image parameterization
-            instance to optimize the output of.
-        batch_index (int or list of int, optional): The index or index range of
-            activations to optimize if optimizing a batch of activations. If set to
-            None, defaults to all activations in the batch. Index ranges should be
-            in the format of: [start, end].
-            Default: None
-    """
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            batch_index (int or list of int, optional): The index or index range of
+                activations to optimize if optimizing a batch of activations. If set
+                to None, defaults to all activations in the batch. Index ranges should
+                be in the format of: [start, end].
+                Default: None
+        """
+        BaseLoss.__init__(self, target, batch_index)
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         activations = targets_to_values[self.target]
@@ -559,16 +583,24 @@ class Diversity(BaseLoss):
     This loss helps break up polysemantic layers, channels, and neurons by encouraging
     diversity across the different batches. This loss is to be used along with a main
     loss.
+    """
 
-    Args:
+    def __init__(
+        self,
+        target: nn.Module,
+        batch_index: Optional[List[int]] = None,
+    ) -> None:
+        """
+        Args:
 
-        target (nn.Module): A target layer, transform, or image parameterization
-            instance to optimize the output of.
-        batch_index (list of int, optional): The index range of activations to
-            optimize. If set to None, defaults to all activations in the batch. Index
-            ranges should be in the format of: [start, end].
-            Default: None
-    """
+            target (nn.Module): A target layer, transform, or image parameterization
+                instance to optimize the output of.
+            batch_index (list of int, optional): The index range of activations to
+                optimize. If set to None, defaults to all activations in the batch.
+                Index ranges should be in the format of: [start, end].
+                Default: None
+        """
+        BaseLoss.__init__(self, target, batch_index)
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         activations = targets_to_values[self.target]

From 5837745fae82c98a4267b7124cea535825821179 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Mon, 27 Jun 2022 17:56:19 -0600
Subject: [PATCH 07/18] Improve loss docs for Sphinx

---
 captum/optim/_core/loss.py | 240 ++++++++++++++++++++++++-------------
 1 file changed, 160 insertions(+), 80 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 4657b23f8c..8a4a1a65b9 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -197,6 +197,10 @@ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor:
 
 
 class BaseLoss(Loss):
+    """
+    The base class used for all Loss objectives.
+    """
+
     def __init__(
         self,
         target: Union[nn.Module, List[nn.Module]] = [],
@@ -209,9 +213,9 @@ def __init__(
                 nn.Module.
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
-                None, defaults to all activations in the batch. Index ranges should be
-                in the format of: [start, end].
-                Default: None
+                ``None``, defaults to all activations in the batch. Index ranges should
+                be in the format of: [start, end].
+                Default: ``None``
         """
         super().__init__()
         self._target = target
@@ -244,6 +248,82 @@ def batch_index(self) -> Tuple:
 
 
 class CompositeLoss(BaseLoss):
+    """
+    When math operations are performed using one or more loss objectives, this class
+    is used to store and run those operations. Below we show examples of common
+    CompositeLoss use cases.
+
+
+    Using CompositeLoss with a unary op or with a binary op involving a Loss instance
+    and a float or integer:
+
+    .. code-block:: python
+
+        def compose_single_loss(loss: opt.loss.Loss) -> opt.loss.CompositeLoss:
+            def loss_fn(
+                module: Dict[nn.Module, Optional[torch.Tensor]]
+            ) -> torch.Tensor:
+                return loss(module)
+
+            # Name of new composable loss instance
+            name = loss.__name__
+            # All targets being used in the composable loss instance
+            target = loss.target
+            return opt.loss.CompositeLoss(loss_fn, name=name, target=target)
+
+    Using CompositeLoss with a binary op using two Loss instances:
+
+    .. code-block:: python
+
+        def compose_binary_loss(
+            loss1: opt.loss.Loss, loss2: opt.loss.Loss
+        ) -> opt.loss.CompositeLoss:
+            def loss_fn(
+                module: Dict[nn.Module, Optional[torch.Tensor]]
+            ) -> torch.Tensor:
+                # Operation using 2 loss instances
+                return loss1(module) + loss2(module)
+
+            # Name of new composable loss instance
+            name = "Compose(" + ", ".join([loss1.__name__, loss2.__name__]) + ")"
+
+            # All targets being used in the composable loss instance
+            target1 = loss1.target if type(loss1.target) is list else [loss1.target]
+            target2 = loss2.target if type(loss2.target) is list else [loss2.target]
+            target = target1 + target2
+
+            # Remove duplicate targets
+            target = list(dict.fromkeys(target))
+            return opt.loss.CompositeLoss(loss_fn, name=name, target=target)
+
+    Using CompositeLoss with a list of Loss instances:
+
+    .. code-block:: python
+
+        def compose_multiple_loss(loss: List[opt.loss.Loss]) -> opt.loss.CompositeLoss:
+            def loss_fn(
+                module: Dict[nn.Module, Optional[torch.Tensor]]
+            ) -> torch.Tensor:
+                loss_tensors = [loss_obj(module) for loss_obj in loss]
+                # We can use any operation that combines the list of tensors into a
+                # single tensor
+                return sum(loss_tensors)
+
+            # Name of new composable loss instance
+            name = "Compose(" + ", ".join([obj.__name__ for obj in loss]) + ")"
+
+            # All targets being used in the composable loss instance
+            # targets will either be List[nn.Module] or nn.Module
+            targets = [loss_obj.target for loss_obj in loss]
+            # Flatten list of targets
+            target = [
+                o for l in [t if type(t) is list else [t] for t in targets] for o in l
+            ]
+            # Remove duplicate targets
+            target = list(dict.fromkeys(target))
+            return opt.loss.CompositeLoss(loss_fn, name=name, target=target)
+    """
+
     def __init__(
         self,
         loss_fn: Callable,
@@ -258,7 +338,7 @@ def __init__(
                 objective(s) & math operations.
             name (str, optional): The name of all composable operations in the
                 instance.
-                Default: ""
+                Default: ``""``
             target (nn.Module or list of nn.module): A target nn.Module or list of
                 nn.Module.
         """
@@ -317,9 +397,9 @@ def __init__(
                 instance to optimize the output of.
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
-                to None, defaults to all activations in the batch. Index ranges should
-                be in the format of: [start, end].
-                Default: None
+                to ``None``, defaults to all activations in the batch. Index ranges
+                should be in the format of: [start, end].
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
 
@@ -351,9 +431,9 @@ def __init__(
             channel_index (int): The index of the channel to optimize for.
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
-                None, defaults to all activations in the batch. Index ranges should be
-                in the format of: [start, end].
-                Default: None
+                ``None``, defaults to all activations in the batch. Index ranges should
+                be in the format of: [start, end].
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         self.channel_index = channel_index
@@ -397,16 +477,16 @@ def __init__(
             x (int, optional): The x coordinate of the neuron to optimize for. If
                 unspecified, defaults to center, or one unit left of center for even
                 lengths.
-                Default: None
+                Default: ``None``
             y (int, optional): The y coordinate of the neuron to optimize for. If
                 unspecified, defaults to center, or one unit up of center for even
                 heights.
-                Default: None
+                Default: ``None``
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
-                None, defaults to all activations in the batch. Index ranges should be
-                in the format of: [start, end].
-                Default: None
+                ``None``, defaults to all activations in the batch. Index ranges should
+                be in the format of: [start, end].
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         self.channel_index = channel_index
@@ -452,9 +532,9 @@ def __init__(
                 instance to optimize the output of.
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
-                to None, defaults to all activations in the batch. Index ranges should
-                be in the format of: [start, end].
-                Default: None
+                to ``None``, defaults to all activations in the batch. Index ranges
+                should be in the format of: [start, end].
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
 
@@ -487,9 +567,9 @@ def __init__(
                 instance to optimize the output of.
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
-                to None, defaults to all activations in the batch. Index ranges should
-                be in the format of: [start, end].
-                Default: None
+                to ``None``, defaults to all activations in the batch. Index ranges
+                should be in the format of: [start, end].
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
 
@@ -521,9 +601,9 @@ def __init__(
             constant (float): Constant threshold to deduct from the activations.
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
-                None, defaults to all activations in the batch. Index ranges should be
-                in the format of: [start, end].
-                Default: None
+                ``None``, defaults to all activations in the batch. Index ranges should
+                be in the format of: [start, end].
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         self.constant = constant
@@ -553,14 +633,14 @@ def __init__(
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
             constant (float): Constant threshold to deduct from the activations.
-                Default: 0.0
+                Default: ``0.0``
             eps (float): Small value to add to L2 prior to sqrt.
-                Default: 1e-6
+                Default: ``1e-6``
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
-                None, defaults to all activations in the batch. Index ranges should be
-                in the format of: [start, end].
-                Default: None
+                ``None``, defaults to all activations in the batch. Index ranges should
+                be in the format of: [start, end].
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         self.constant = constant
@@ -596,9 +676,9 @@ def __init__(
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
             batch_index (list of int, optional): The index range of activations to
-                optimize. If set to None, defaults to all activations in the batch.
+                optimize. If set to ``None``, defaults to all activations in the batch.
                 Index ranges should be in the format of: [start, end].
-                Default: None
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
 
@@ -643,13 +723,13 @@ def __init__(
             target1 (nn.Module): The first layer, transform, or image parameterization
                 instance to optimize the output for.
             channel_index1 (int, optional): Index of channel in first target to
-                optimize. Default is set to None for all channels.
-                Default: None
+                optimize. Default is set to ``None`` for all channels.
+                Default: ``None``
             target2 (nn.Module): The second layer, transform, or image parameterization
                 instance to optimize the output for.
             channel_index2 (int, optional): Index of channel in second target to
-                optimize. Default is set to None for all channels.
-                Default: None
+                optimize. Default is set to ``None`` for all channels.
+                Default: ``None``
         """
         self.target_one = target1
         self.channel_index_one = channel_index1
@@ -712,11 +792,11 @@ def __init__(
                 instance to optimize the output of.
             decay_ratio (float): How much to decay penalty as images move apart in
                 the batch.
-                Default: 2.0
+                Default: ``2.0``
             batch_index (list of int, optional): The index range of activations to
-                optimize. If set to None, defaults to all activations in the batch.
+                optimize. If set to ``None``, defaults to all activations in the batch.
                 Index ranges should be in the format of: [start, end].
-                Default: None
+                Default: ``None``
         """
         if batch_index:
             assert len(batch_index) == 2
@@ -766,11 +846,11 @@ def __init__(
                 instance to optimize the output of.
             vec (torch.Tensor): Vector representing direction to align to.
             cossim_pow (float, optional): The desired cosine similarity power to use.
-                Default: 0.0
+                Default: ``0.0``
             batch_index (int, optional): The index of activations to optimize if
-                optimizing a batch of activations. If set to None, defaults to all
-                activations in the batch.
-                Default: None
+                optimizing a batch of activations. If set to ``None``, defaults to
+                all activations in the batch.
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         self.vec = vec.reshape((1, -1, 1, 1))
@@ -810,22 +890,22 @@ def __init__(
                 instance to optimize the output of.
             vec (torch.Tensor): Vector representing direction to align to.
             x (int, optional): The x coordinate of the neuron to optimize for. If
-                set to None, defaults to center, or one unit left of center for even
-                lengths.
-                Default: None
+                set to ``None``, defaults to center, or one unit left of center for
+                even lengths.
+                Default: ``None``
             y (int, optional): The y coordinate of the neuron to optimize for. If
-                set to None, defaults to center, or one unit up of center for even
-                heights.
-                Default: None
+                set to ``None``, defaults to center, or one unit up of center for
+                even heights.
+                Default: ``None``
             channel_index (int): The index of the channel to optimize for. If set to
-                None, then all channels will be used.
-                Default: None
+                ``None``, then all channels will be used.
+                Default: ``None``
             cossim_pow (float, optional): The desired cosine similarity power to use.
-                Default: 0.0
+                Default: ``0.0``
             batch_index (int, optional): The index of activations to optimize if
-                optimizing a batch of activations. If set to None, defaults to all
+                optimizing a batch of activations. If set to ``None``, defaults to all
                 activations in the batch.
-                Default: None
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         self.vec = vec.reshape((1, -1, 1, 1))
@@ -897,24 +977,24 @@ def __init__(
                 instance to optimize the output of.
             vec (torch.Tensor): A neuron direction vector to use.
             vec_whitened (torch.Tensor, optional): A whitened neuron direction vector.
-                If set to None, then no whitened vec will be used.
-                Default: None
+                If set to ``None``, then no whitened vec will be used.
+                Default: ``None``
             cossim_pow (float, optional): The desired cosine similarity power to use.
             x (int, optional): The x coordinate of the neuron to optimize for. If
-                set to None, defaults to center, or one unit left of center for even
-                lengths.
-                Default: None
+                set to ``None``, defaults to center, or one unit left of center for
+                even lengths.
+                Default: ``None``
             y (int, optional): The y coordinate of the neuron to optimize for. If
-                set to None, defaults to center, or one unit up of center for even
-                heights.
-                Default: None
+                set to ``None``, defaults to center, or one unit up of center for
+                even heights.
+                Default: ``None``
             eps (float, optional): If cossim_pow is greater than zero, the desired
                 epsilon value to use for cosine similarity calculations.
-                Default: 1.0e-4
+                Default: ``1.0e-4``
             batch_index (int, optional): The index of activations to optimize if
-                optimizing a batch of activations. If set to None, defaults to all
+                optimizing a batch of activations. If set to ``None``, defaults to all
                 activations in the batch.
-                Default: None
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         self.vec = vec.unsqueeze(0) if vec.dim() == 1 else vec
@@ -974,11 +1054,11 @@ def __init__(
                 instance to optimize the output of.
             vec (torch.Tensor): Vector representing direction to align to.
             cossim_pow (float, optional): The desired cosine similarity power to use.
-                Default: 0.0
+                Default: ``0.0``
             batch_index (int, optional): The index of activations to optimize if
-                optimizing a batch of activations. If set to None, defaults to all
+                optimizing a batch of activations. If set to ``None``, defaults to all
                 activations in the batch.
-                Default: None
+                Default: ``None``
         """
         BaseLoss.__init__(self, target, batch_index)
         assert vec.dim() == 4
@@ -1030,21 +1110,21 @@ def __init__(
                 instance to optimize the output of.
             weights (torch.Tensor): Weights to apply to targets.
             neuron (bool): Whether target is a neuron.
-                Default: False
+                Default: ``False``
             x (int, optional): The x coordinate of the neuron to optimize for. If
-                set to None, defaults to center, or one unit left of center for even
-                lengths.
-                Default: None
+                set to ``None``, defaults to center, or one unit left of center for
+                even lengths.
+                Default: ``None``
             y (int, optional): The y coordinate of the neuron to optimize for. If
-                set to None, defaults to center, or one unit up of center for even
-                heights.
-                Default: None
+                set to ``None``, defaults to center, or one unit up of center for
+                even heights.
+                Default: ``None``
             wx (int, optional): Length of neurons to apply the weights to, along the
-                x-axis. Set to None for the full length.
-                Default: None
+                x-axis. Set to ``None`` for the full length.
+                Default: ``None``
             wy (int, optional): Length of neurons to apply the weights to, along the
-                y-axis. Set to None for the full length.
-                Default: None
+                y-axis. Set to ``None`` for the full length.
+                Default: ``None``
         """
         BaseLoss.__init__(self, target)
         self.x = x
@@ -1100,11 +1180,11 @@ def sum_loss_list(
         loss_list (list): A list of loss objectives.
         to_scalar_fn (Callable): A function for converting loss objective outputs to
             scalar values, in order to prevent size mismatches.
-            Default: torch.mean
+            Default: ``torch.mean``
 
     Returns:
         loss_fn (CompositeLoss): A CompositeLoss instance containing all the loss
-            functions from `loss_list`.
+            functions from ``loss_list``.
     """
 
     def loss_fn(module: ModuleOutputMapping) -> torch.Tensor:

From 407f76903ea0ca1ced8161e7359d1a18c11ecedf Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Fri, 15 Jul 2022 10:46:17 -0600
Subject: [PATCH 08/18] Improve DeepDream docs

---
 captum/optim/_core/loss.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 8a4a1a65b9..33c840e037 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -515,9 +515,15 @@ class DeepDream(BaseLoss):
     Maximize 'interestingness' at the target layer.
     Mordvintsev et al., 2015.
     https://github.com/google/deepdream
+
     This loss returns the squared layer activations. When combined with a negative
     mean loss summarization, this loss will create hallucinogenic visuals commonly
     referred to as 'Deep Dream'.
+
+    DeepDream tries to increase the values of neurons proportional to the amount
+    they are presently active. This is equivalent to maximizing the sum of the
+    squares. If you remove the square, you'd be doing a direciton visualization
+    of: ``[1,1,1,....]``.
     """
 
     def __init__(

From 6f10b76c5e15639969c339f8dcd2f348595cd6e3 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Fri, 15 Jul 2022 11:50:33 -0600
Subject: [PATCH 09/18] Improve doc grammar

---
 captum/optim/_core/loss.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 33c840e037..5f9e6cf62c 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -522,8 +522,8 @@ class DeepDream(BaseLoss):
 
     DeepDream tries to increase the values of neurons proportional to the amount
     they are presently active. This is equivalent to maximizing the sum of the
-    squares. If you remove the square, you'd be doing a direciton visualization
-    of: ``[1,1,1,....]``.
+    squares. If you remove the square, you'd be visualizing a direction of:
+    ``[1,1,1,....]`` (which is same as :class:`.LayerActivation`).
     """
 
     def __init__(

From eb5a961481d28720bc84f39b206983bd1d385895 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Fri, 15 Jul 2022 14:12:38 -0600
Subject: [PATCH 10/18] Fix nn.Module type hints

---
 captum/optim/_core/loss.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 5f9e6cf62c..1c4eabc285 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -209,7 +209,7 @@ def __init__(
         """
         Args:
 
-            target (nn.Module or list of nn.module): A target nn.Module or list of
+            target (nn.Module or list of nn.Module): A target nn.Module or list of
                 nn.Module.
             batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
@@ -339,7 +339,7 @@ def __init__(
             name (str, optional): The name of all composable operations in the
                 instance.
                 Default: ``""``
-            target (nn.Module or list of nn.module): A target nn.Module or list of
+            target (nn.Module or list of nn.Module): A target nn.Module or list of
                 nn.Module.
         """
         super().__init__(target)
@@ -1227,7 +1227,7 @@ def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor:
     """
     Helper function to summarize tensor outputs from loss objectives.
 
-    default_loss_summarize applies `mean` to the loss tensor
+    default_loss_summarize applies :func:`torch.mean` to the loss tensor
     and negates it so that optimizing it maximizes the activations we
     are interested in.
 

From f7812657a06810f838e4b2aa4ee55bfa942dfe09 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Sat, 16 Jul 2022 10:45:12 -0600
Subject: [PATCH 11/18] Fix loss doc type formating

---
 captum/optim/_core/loss.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 1c4eabc285..b3a100b673 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -209,9 +209,9 @@ def __init__(
         """
         Args:
 
-            target (nn.Module or list of nn.Module): A target nn.Module or list of
+            target (nn.Module or List[nn.Module]): A target nn.Module or list of
                 nn.Module.
-            batch_index (int or list of int, optional): The index or index range of
+            batch_index (int or List[int], optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -232,7 +232,7 @@ def __init__(
     def target(self) -> Union[nn.Module, List[nn.Module]]:
         """
         Returns:
-            target (nn.Module or list of nn.Module): A target nn.Module or list of
+            target (nn.Module or List[nn.Module]): A target nn.Module or list of
                 nn.Module.
         """
         return self._target
@@ -241,7 +241,7 @@ def target(self) -> Union[nn.Module, List[nn.Module]]:
     def batch_index(self) -> Tuple:
         """
         Returns:
-            batch_index (tuple of int): A tuple of batch indices with a format
+            batch_index (Tuple[int]): A tuple of batch indices with a format
                 of: (start, end).
         """
         return self._batch_index
@@ -339,7 +339,7 @@ def __init__(
             name (str, optional): The name of all composable operations in the
                 instance.
                 Default: ``""``
-            target (nn.Module or list of nn.Module): A target nn.Module or list of
+            target (nn.Module or List[nn.Module]): A target nn.Module or list of
                 nn.Module.
         """
         super().__init__(target)
@@ -395,7 +395,7 @@ def __init__(
 
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
-            batch_index (int or list of int, optional): The index or index range of
+            batch_index (int or List[int], optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
                 to ``None``, defaults to all activations in the batch. Index ranges
                 should be in the format of: [start, end].
@@ -429,7 +429,7 @@ def __init__(
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
             channel_index (int): The index of the channel to optimize for.
-            batch_index (int or list of int, optional): The index or index range of
+            batch_index (int or List[int], optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -482,7 +482,7 @@ def __init__(
                 unspecified, defaults to center, or one unit up of center for even
                 heights.
                 Default: ``None``
-            batch_index (int or list of int, optional): The index or index range of
+            batch_index (int or List[int], optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -536,7 +536,7 @@ def __init__(
 
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
-            batch_index (int or list of int, optional): The index or index range of
+            batch_index (int or List[int], optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
                 to ``None``, defaults to all activations in the batch. Index ranges
                 should be in the format of: [start, end].
@@ -571,7 +571,7 @@ def __init__(
 
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
-            batch_index (int or list of int, optional): The index or index range of
+            batch_index (int or List[int], optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
                 to ``None``, defaults to all activations in the batch. Index ranges
                 should be in the format of: [start, end].
@@ -605,7 +605,7 @@ def __init__(
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
             constant (float): Constant threshold to deduct from the activations.
-            batch_index (int or list of int, optional): The index or index range of
+            batch_index (int or List[int], optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -642,7 +642,7 @@ def __init__(
                 Default: ``0.0``
             eps (float): Small value to add to L2 prior to sqrt.
                 Default: ``1e-6``
-            batch_index (int or list of int, optional): The index or index range of
+            batch_index (int or List[int], optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -681,11 +681,13 @@ def __init__(
 
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
-            batch_index (list of int, optional): The index range of activations to
+            batch_index (List[int], optional): The index range of activations to
                 optimize. If set to ``None``, defaults to all activations in the batch.
                 Index ranges should be in the format of: [start, end].
                 Default: ``None``
         """
+        if batch_index:
+            assert len(batch_index) == 2
         BaseLoss.__init__(self, target, batch_index)
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -799,7 +801,7 @@ def __init__(
             decay_ratio (float): How much to decay penalty as images move apart in
                 the batch.
                 Default: ``2.0``
-            batch_index (list of int, optional): The index range of activations to
+            batch_index (List[int], optional): The index range of activations to
                 optimize. If set to ``None``, defaults to all activations in the batch.
                 Index ranges should be in the format of: [start, end].
                 Default: ``None``

From 42b18ca47f708cc553b8c047e9ebf180c7103ca5 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Sun, 17 Jul 2022 08:54:40 -0600
Subject: [PATCH 12/18] Add more assert checks

---
 captum/optim/_core/loss.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index b3a100b673..1d6f26c5d0 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -687,6 +687,7 @@ def __init__(
                 Default: ``None``
         """
         if batch_index:
+            assert isinstance(batch_index, (list, tuple))
             assert len(batch_index) == 2
         BaseLoss.__init__(self, target, batch_index)
 
@@ -807,6 +808,7 @@ def __init__(
                 Default: ``None``
         """
         if batch_index:
+            assert isinstance(batch_index, (list, tuple))
             assert len(batch_index) == 2
         BaseLoss.__init__(self, target, batch_index)
         self.decay_ratio = decay_ratio

From a9eabfd446f4e1bdeb29e5e93aecc24fbe1fcc1d Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Mon, 18 Jul 2022 15:18:33 -0600
Subject: [PATCH 13/18] Fix loss docstring type hint formatting

---
 captum/optim/_core/loss.py | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 1d6f26c5d0..5c534613f2 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -209,9 +209,9 @@ def __init__(
         """
         Args:
 
-            target (nn.Module or List[nn.Module]): A target nn.Module or list of
+            target (nn.Module or list of nn.Module): A target nn.Module or list of
                 nn.Module.
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -232,7 +232,7 @@ def __init__(
     def target(self) -> Union[nn.Module, List[nn.Module]]:
         """
         Returns:
-            target (nn.Module or List[nn.Module]): A target nn.Module or list of
+            target (nn.Module or list of nn.Module): A target nn.Module or list of
                 nn.Module.
         """
         return self._target
@@ -241,7 +241,7 @@ def target(self) -> Union[nn.Module, List[nn.Module]]:
     def batch_index(self) -> Tuple:
         """
         Returns:
-            batch_index (Tuple[int]): A tuple of batch indices with a format
+            batch_index (tuple of int): A tuple of batch indices with a format
                 of: (start, end).
         """
         return self._batch_index
@@ -333,13 +333,13 @@ def __init__(
         """
         Args:
 
-            loss_fn (Callable): A function that takes a dict of captured activations
+            loss_fn (callable): A function that takes a dict of captured activations
                 with nn.Modules as keys, and then passes those activations through loss
                 objective(s) & math operations.
             name (str, optional): The name of all composable operations in the
                 instance.
                 Default: ``""``
-            target (nn.Module or List[nn.Module]): A target nn.Module or list of
+            target (nn.Module or list of nn.Module): A target nn.Module or list of
                 nn.Module.
         """
         super().__init__(target)
@@ -395,7 +395,7 @@ def __init__(
 
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
                 to ``None``, defaults to all activations in the batch. Index ranges
                 should be in the format of: [start, end].
@@ -429,7 +429,7 @@ def __init__(
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
             channel_index (int): The index of the channel to optimize for.
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -482,7 +482,7 @@ def __init__(
                 unspecified, defaults to center, or one unit up of center for even
                 heights.
                 Default: ``None``
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -536,7 +536,7 @@ def __init__(
 
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
                 to ``None``, defaults to all activations in the batch. Index ranges
                 should be in the format of: [start, end].
@@ -571,7 +571,7 @@ def __init__(
 
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set
                 to ``None``, defaults to all activations in the batch. Index ranges
                 should be in the format of: [start, end].
@@ -605,7 +605,7 @@ def __init__(
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
             constant (float): Constant threshold to deduct from the activations.
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -642,7 +642,7 @@ def __init__(
                 Default: ``0.0``
             eps (float): Small value to add to L2 prior to sqrt.
                 Default: ``1e-6``
-            batch_index (int or List[int], optional): The index or index range of
+            batch_index (int or list of int, optional): The index or index range of
                 activations to optimize if optimizing a batch of activations. If set to
                 ``None``, defaults to all activations in the batch. Index ranges should
                 be in the format of: [start, end].
@@ -681,7 +681,7 @@ def __init__(
 
             target (nn.Module): A target layer, transform, or image parameterization
                 instance to optimize the output of.
-            batch_index (List[int], optional): The index range of activations to
+            batch_index (list of int, optional): The index range of activations to
                 optimize. If set to ``None``, defaults to all activations in the batch.
                 Index ranges should be in the format of: [start, end].
                 Default: ``None``
@@ -802,7 +802,7 @@ def __init__(
             decay_ratio (float): How much to decay penalty as images move apart in
                 the batch.
                 Default: ``2.0``
-            batch_index (List[int], optional): The index range of activations to
+            batch_index (list of int, optional): The index range of activations to
                 optimize. If set to ``None``, defaults to all activations in the batch.
                 Index ranges should be in the format of: [start, end].
                 Default: ``None``
@@ -1188,9 +1188,10 @@ def sum_loss_list(
     Args:
 
         loss_list (list): A list of loss objectives.
-        to_scalar_fn (Callable): A function for converting loss objective outputs to
-            scalar values, in order to prevent size mismatches.
-            Default: ``torch.mean``
+        to_scalar_fn (callable): A function for converting loss objective outputs to
+            scalar values, in order to prevent size mismatches. Set to
+            :class:`torch.nn.Identity` for no reduction op.
+            Default: :func:`torch.mean`
 
     Returns:
         loss_fn (CompositeLoss): A CompositeLoss instance containing all the loss

From a7fb6d941ce3edb6a5bccb3417c2a413185e8fa5 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 21 Jul 2022 12:45:50 -0600
Subject: [PATCH 14/18] Max line length doesn't apply to urls

---
 captum/optim/_core/loss.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 5c534613f2..c48539151a 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -957,10 +957,8 @@ class AngledNeuronDirection(BaseLoss):
     https://github.com/tensorflow/lucid/issues/116
 
     This Lucid equivalents of this loss objective can be found here:
-    https://github.com/tensorflow/lucid/blob/master/notebooks/
-    activation-atlas/activation-atlas-simple.ipynb
-    https://github.com/tensorflow/lucid/blob/master/notebooks/
-    activation-atlas/class-activation-atlas.ipynb
+    https://github.com/tensorflow/lucid/blob/master/notebooks/activation-atlas/activation-atlas-simple.ipynb
+    https://github.com/tensorflow/lucid/blob/master/notebooks/activation-atlas/class-activation-atlas.ipynb
 
     Like the Lucid equivalents, our implementation differs slightly from the
     associated research paper.

From 819a0a8c4083d19235d82901d3bb6a0783b30443 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Wed, 27 Jul 2022 11:29:13 -0600
Subject: [PATCH 15/18] Remove `loss_wrapper`

---
 captum/optim/_core/loss.py | 45 +-------------------------------------
 1 file changed, 1 insertion(+), 44 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index c48539151a..90f7f36a1d 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -9,20 +9,6 @@
 from captum.optim._utils.typing import ModuleOutputMapping
 
 
-def _make_arg_str(arg: Any) -> str:
-    """
-    Args:
-
-        args (Any): A set of arguments to covert to a string.
-
-    Returns:
-        args (str): The args in str form.
-    """
-    arg = str(arg)
-    too_big = len(arg) > 15 or "\n" in arg
-    return arg[:15] + "..." if too_big else arg
-
-
 class Loss(ABC):
     """
     Abstract Class to describe loss.
@@ -32,6 +18,7 @@ class Loss(ABC):
 
     def __init__(self) -> None:
         super().__init__()
+        self.__name__ = self.__class__.__name__
 
     @abstractproperty
     def target(self) -> Union[nn.Module, List[nn.Module]]:
@@ -362,22 +349,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return self.loss_fn(targets_to_values)
 
 
-def loss_wrapper(cls: Any) -> Callable:
-    """
-    Primarily for naming purposes.
-    """
-
-    @functools.wraps(cls)
-    def wrapper(*args, **kwargs) -> object:
-        obj = cls(*args, **kwargs)
-        args_str = " [" + ", ".join([_make_arg_str(arg) for arg in args]) + "]"
-        obj.__name__ = cls.__name__ + args_str
-        return obj
-
-    return wrapper
-
-
-@loss_wrapper
 class LayerActivation(BaseLoss):
     """
     Maximize activations at the target layer.
@@ -409,7 +380,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return activations
 
 
-@loss_wrapper
 class ChannelActivation(BaseLoss):
     """
     Maximize activations at the target layer and target channel.
@@ -451,7 +421,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         ]
 
 
-@loss_wrapper
 class NeuronActivation(BaseLoss):
     """
     This loss maximizes the activations of a target neuron in the specified channel
@@ -509,7 +478,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         ]
 
 
-@loss_wrapper
 class DeepDream(BaseLoss):
     """
     Maximize 'interestingness' at the target layer.
@@ -550,7 +518,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return activations**2
 
 
-@loss_wrapper
 class TotalVariation(BaseLoss):
     """
     Total variation denoising penalty for activations.
@@ -587,7 +554,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return torch.sum(torch.abs(x_diff)) + torch.sum(torch.abs(y_diff))
 
 
-@loss_wrapper
 class L1(BaseLoss):
     """
     L1 norm of the target layer, generally used as a penalty.
@@ -620,7 +586,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return torch.abs(activations - self.constant).sum()
 
 
-@loss_wrapper
 class L2(BaseLoss):
     """
     L2 norm of the target layer, generally used as a penalty.
@@ -660,7 +625,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return torch.sqrt(self.eps + activations)
 
 
-@loss_wrapper
 class Diversity(BaseLoss):
     """
     Use a cosine similarity penalty to extract features from a polysemantic neuron.
@@ -709,7 +673,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         )
 
 
-@loss_wrapper
 class ActivationInterpolation(BaseLoss):
     """
     Interpolate between two different layers & channels.
@@ -776,7 +739,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return sum_tensor
 
 
-@loss_wrapper
 class Alignment(BaseLoss):
     """
     Penalize the L2 distance between tensors in the batch to encourage visual
@@ -830,7 +792,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return -sum_tensor
 
 
-@loss_wrapper
 class Direction(BaseLoss):
     """
     Visualize a general direction vector.
@@ -873,7 +834,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return _dot_cossim(self.vec, activations, cossim_pow=self.cossim_pow)
 
 
-@loss_wrapper
 class NeuronDirection(BaseLoss):
     """
     Visualize a single (x, y) position for a direction vector.
@@ -940,7 +900,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return _dot_cossim(self.vec, activations, cossim_pow=self.cossim_pow)
 
 
-@loss_wrapper
 class AngledNeuronDirection(BaseLoss):
     """
     Visualize a direction vector with an optional whitened activation vector to
@@ -1039,7 +998,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return dot * torch.clamp(cossims, min=0.1) ** self.cossim_pow
 
 
-@loss_wrapper
 class TensorDirection(BaseLoss):
     """
     Visualize a tensor direction vector.
@@ -1093,7 +1051,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
         return _dot_cossim(self.vec, activations, cossim_pow=self.cossim_pow)
 
 
-@loss_wrapper
 class ActivationWeights(BaseLoss):
     """
     Apply weights to channels, neurons, or spots in the target.

From 61a0be93d6279f3aa09e75e8e1a26c53eed9fbff Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Wed, 27 Jul 2022 14:15:44 -0600
Subject: [PATCH 16/18] Fix lint errors

---
 captum/optim/_core/loss.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 90f7f36a1d..ee47ae0ea5 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -1,7 +1,6 @@
-import functools
 import operator
 from abc import ABC, abstractmethod, abstractproperty
-from typing import Any, Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -1203,7 +1202,6 @@ def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor:
 
 __all__ = [
     "Loss",
-    "loss_wrapper",
     "BaseLoss",
     "CompositeLoss",
     "LayerActivation",

From f2f7ea553d63879e6c3336c96f7e7ef55fc344a1 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Wed, 27 Jul 2022 19:43:27 -0600
Subject: [PATCH 17/18] Fix typehint mistake

---
 captum/optim/_core/loss.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index ee47ae0ea5..6ec08391b1 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -806,7 +806,7 @@ def __init__(
         self,
         target: nn.Module,
         vec: torch.Tensor,
-        cossim_pow: Optional[float] = 0.0,
+        cossim_pow: float = 0.0,
         batch_index: Optional[int] = None,
     ) -> None:
         """
@@ -849,7 +849,7 @@ def __init__(
         x: Optional[int] = None,
         y: Optional[int] = None,
         channel_index: Optional[int] = None,
-        cossim_pow: Optional[float] = 0.0,
+        cossim_pow: float = 0.0,
         batch_index: Optional[int] = None,
     ) -> None:
         """
@@ -1009,7 +1009,7 @@ def __init__(
         self,
         target: nn.Module,
         vec: torch.Tensor,
-        cossim_pow: Optional[float] = 0.0,
+        cossim_pow: float = 0.0,
         batch_index: Optional[int] = None,
     ) -> None:
         """

From 03cea17d9ce73bf0abd623dc6f92204c84e3340b Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 11 Aug 2022 09:32:14 -0600
Subject: [PATCH 18/18] callable -> Callable

---
 captum/optim/_core/loss.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 6ec08391b1..ffd7c8e43d 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -319,7 +319,7 @@ def __init__(
         """
         Args:
 
-            loss_fn (callable): A function that takes a dict of captured activations
+            loss_fn (Callable): A function that takes a dict of captured activations
                 with nn.Modules as keys, and then passes those activations through loss
                 objective(s) & math operations.
             name (str, optional): The name of all composable operations in the
@@ -1142,7 +1142,7 @@ def sum_loss_list(
     Args:
 
         loss_list (list): A list of loss objectives.
-        to_scalar_fn (callable): A function for converting loss objective outputs to
+        to_scalar_fn (Callable): A function for converting loss objective outputs to
             scalar values, in order to prevent size mismatches. Set to
             :class:`torch.nn.Identity` for no reduction op.
             Default: :func:`torch.mean`