From acfd2f71a8def934bb66d05ae6a1c0363846ef1d Mon Sep 17 00:00:00 2001
From: SK <limsweekiat@gmail.com>
Date: Wed, 8 Dec 2021 00:14:56 +0800
Subject: [PATCH 1/3] Add explanations to losses

---
 captum/optim/_core/loss.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index aa33793642..aed1b33fd4 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -189,6 +189,8 @@ def wrapper(*args, **kwargs) -> object:
 class LayerActivation(BaseLoss):
     """
     Maximize activations at the target layer.
+    This is the most basic loss available and it simply returns the activations in
+    their original form.
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -201,6 +203,8 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
 class ChannelActivation(BaseLoss):
     """
     Maximize activations at the target layer and target channel.
+    This loss maximizes the activations of a target channel in a specified target
+    layer, and can be useful to determine what features the channel is excited by.
     """
 
     def __init__(
@@ -224,6 +228,12 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
 
 @loss_wrapper
 class NeuronActivation(BaseLoss):
+    """
+    This loss maximizes the activations of a target neuron in the specified channel
+    from the specified layer. This loss is useful for determining the type of features
+    that excite a neuron, and thus is often used for circuits and neuron related
+    research.
+    """
     def __init__(
         self,
         target: nn.Module,
@@ -258,6 +268,10 @@ class DeepDream(BaseLoss):
     """
     Maximize 'interestingness' at the target layer.
     Mordvintsev et al., 2015.
+    https://github.com/google/deepdream
+    This loss returns the squared layer activations. When combined with a negative
+    mean loss summarization, this loss will create hallucinogenic visuals commonly
+    referred to as 'Deep Dream'. 
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -272,6 +286,9 @@ class TotalVariation(BaseLoss):
     Total variation denoising penalty for activations.
     See Mahendran, V. 2014. Understanding Deep Image Representations by Inverting Them.
     https://arxiv.org/abs/1412.0035
+    This loss attempts to smooth / denoise the target by performing total variance
+    denoising. The target is most often the image that’s being optimized. This loss is
+    often used to remove unwanted visual artifacts.
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -334,6 +351,9 @@ class Diversity(BaseLoss):
     Use a cosine similarity penalty to extract features from a polysemantic neuron.
     Olah, Mordvintsev & Schubert, 2017.
     https://distill.pub/2017/feature-visualization/#diversity
+    This loss helps break up polysemantic layers, channels, and neurons by encouraging
+    diversity across the different batches. This loss is to be used along with a main
+    loss.
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -359,6 +379,8 @@ class ActivationInterpolation(BaseLoss):
     Interpolate between two different layers & channels.
     Olah, Mordvintsev & Schubert, 2017.
     https://distill.pub/2017/feature-visualization/#Interaction-between-Neurons
+    This loss helps to interpolate or mix visualizations from two activations (layer or
+    channel) by interpolating a linear sum between the two activations.
     """
 
     def __init__(
@@ -410,6 +432,9 @@ class Alignment(BaseLoss):
     similarity between them.
     Olah, Mordvintsev & Schubert, 2017.
     https://distill.pub/2017/feature-visualization/#Interaction-between-Neurons
+    When interpolating between activations, it may be desirable to keep image landmarks
+    in the same position for visual comparison. This loss helps to minimize L2 distance
+    between neighbouring images. 
     """
 
     def __init__(self, target: nn.Module, decay_ratio: float = 2.0) -> None:
@@ -438,6 +463,10 @@ class Direction(BaseLoss):
     Visualize a general direction vector.
     Carter, et al., "Activation Atlas", Distill, 2019.
     https://distill.pub/2019/activation-atlas/#Aggregating-Multiple-Images
+    This loss helps to visualize a specific vector direction in a layer, by maximizing
+    the alignment between the input vector and the layer’s activation vector. The
+    dimensionality of the vector should correspond to the number of channels in the
+    layer.
     """
 
     def __init__(
@@ -464,6 +493,8 @@ class NeuronDirection(BaseLoss):
     Visualize a single (x, y) position for a direction vector.
     Carter, et al., "Activation Atlas", Distill, 2019.
     https://distill.pub/2019/activation-atlas/#Aggregating-Multiple-Images
+    Extends Direction loss by focusing on visualizing a single neuron within the
+    kernel.
     """
 
     def __init__(
@@ -505,6 +536,7 @@ class TensorDirection(BaseLoss):
     Visualize a tensor direction vector.
     Carter, et al., "Activation Atlas", Distill, 2019.
     https://distill.pub/2019/activation-atlas/#Aggregating-Multiple-Images
+    Extends Direction loss by allowing batch-wise direction visualization.
     """
 
     def __init__(
@@ -542,6 +574,8 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
 class ActivationWeights(BaseLoss):
     """
     Apply weights to channels, neurons, or spots in the target.
+    This loss weighs specific channels or neurons in a given layer, via a weight
+    vector.
     """
 
     def __init__(

From 50dedc4c45646634c6d08d13a5687ed4f3f5578a Mon Sep 17 00:00:00 2001
From: SK <limsweekiat@gmail.com>
Date: Wed, 29 Dec 2021 00:39:18 +0800
Subject: [PATCH 2/3] Add argument documentation for losses

---
 captum/optim/_core/loss.py | 123 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 121 insertions(+), 2 deletions(-)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index aed1b33fd4..02db5f6185 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -191,6 +191,12 @@ class LayerActivation(BaseLoss):
     Maximize activations at the target layer.
     This is the most basic loss available and it simply returns the activations in
     their original form.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -205,6 +211,13 @@ class ChannelActivation(BaseLoss):
     Maximize activations at the target layer and target channel.
     This loss maximizes the activations of a target channel in a specified target
     layer, and can be useful to determine what features the channel is excited by.
+
+    Args:
+        target (nn.Module):  The layer to containing the channel to optimize for.
+        channel_index (int):  The index of the channel to optimize for.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
 
     def __init__(
@@ -233,6 +246,19 @@ class NeuronActivation(BaseLoss):
     from the specified layer. This loss is useful for determining the type of features
     that excite a neuron, and thus is often used for circuits and neuron related
     research.
+
+    Args:
+        target (nn.Module):  The layer to containing the channel to optimize for.
+        channel_index (int):  The index of the channel to optimize for.
+        x (int, optional):  The x coordinate of the neuron to optimize for. If
+            unspecified, defaults to center, or one unit left of center for even
+            lengths.
+        y (int, optional):  The y coordinate of the neuron to optimize for. If
+            unspecified, defaults to center, or one unit up of center for even
+            heights.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
     def __init__(
         self,
@@ -271,7 +297,13 @@ class DeepDream(BaseLoss):
     https://github.com/google/deepdream
     This loss returns the squared layer activations. When combined with a negative
     mean loss summarization, this loss will create hallucinogenic visuals commonly
-    referred to as 'Deep Dream'. 
+    referred to as 'Deep Dream'.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -289,6 +321,12 @@ class TotalVariation(BaseLoss):
     This loss attempts to smooth / denoise the target by performing total variance
     denoising. The target is most often the image that’s being optimized. This loss is
     often used to remove unwanted visual artifacts.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -303,6 +341,14 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
 class L1(BaseLoss):
     """
     L1 norm of the target layer, generally used as a penalty.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        constant (float):  Constant threshold to deduct from the activations.
+            Defaults to 0.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
 
     def __init__(
@@ -324,6 +370,15 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
 class L2(BaseLoss):
     """
     L2 norm of the target layer, generally used as a penalty.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        constant (float):  Constant threshold to deduct from the activations.
+            Defaults to 0.
+        epsilon (float):  Small value to add to L2 prior to sqrt. Defaults to 1e-6.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
 
     def __init__(
@@ -354,6 +409,11 @@ class Diversity(BaseLoss):
     This loss helps break up polysemantic layers, channels, and neurons by encouraging
     diversity across the different batches. This loss is to be used along with a main
     loss.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        batch_index (int, optional):  Unused here since we are optimizing for diversity
+            across the batch.
     """
 
     def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:
@@ -381,6 +441,14 @@ class ActivationInterpolation(BaseLoss):
     https://distill.pub/2017/feature-visualization/#Interaction-between-Neurons
     This loss helps to interpolate or mix visualizations from two activations (layer or
     channel) by interpolating a linear sum between the two activations.
+
+    Args:
+        target1 (nn.Module):  The first layer to optimize for.
+        channel_index1 (int):  Index of channel in first layer to optimize. Defaults to
+            all channels.
+        target2 (nn.Module):  The first layer to optimize for.
+        channel_index2 (int):  Index of channel in first layer to optimize. Defaults to
+            all channels.
     """
 
     def __init__(
@@ -434,7 +502,12 @@ class Alignment(BaseLoss):
     https://distill.pub/2017/feature-visualization/#Interaction-between-Neurons
     When interpolating between activations, it may be desirable to keep image landmarks
     in the same position for visual comparison. This loss helps to minimize L2 distance
-    between neighbouring images. 
+    between neighbouring images.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        decay_ratio (float):  How much to decay penalty as images move apart in batch.
+            Defaults to 2.
     """
 
     def __init__(self, target: nn.Module, decay_ratio: float = 2.0) -> None:
@@ -467,6 +540,14 @@ class Direction(BaseLoss):
     the alignment between the input vector and the layer’s activation vector. The
     dimensionality of the vector should correspond to the number of channels in the
     layer.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        vec (torch.Tensor):  Vector representing direction to align to.
+        cossim_pow (float, optional):  The desired cosine similarity power to use.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
 
     def __init__(
@@ -495,6 +576,21 @@ class NeuronDirection(BaseLoss):
     https://distill.pub/2019/activation-atlas/#Aggregating-Multiple-Images
     Extends Direction loss by focusing on visualizing a single neuron within the
     kernel.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        vec (torch.Tensor):  Vector representing direction to align to.
+        x (int, optional):  The x coordinate of the neuron to optimize for. If
+            unspecified, defaults to center, or one unit left of center for even
+            lengths.
+        y (int, optional):  The y coordinate of the neuron to optimize for. If
+            unspecified, defaults to center, or one unit up of center for even
+            heights.
+        channel_index (int):  The index of the channel to optimize for.
+        cossim_pow (float, optional):  The desired cosine similarity power to use.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
 
     def __init__(
@@ -537,6 +633,14 @@ class TensorDirection(BaseLoss):
     Carter, et al., "Activation Atlas", Distill, 2019.
     https://distill.pub/2019/activation-atlas/#Aggregating-Multiple-Images
     Extends Direction loss by allowing batch-wise direction visualization.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        vec (torch.Tensor):  Vector representing direction to align to.
+        cossim_pow (float, optional):  The desired cosine similarity power to use.
+        batch_index (int, optional):  The index of the image to optimize if we
+            optimizing a batch of images. If unspecified, defaults to all images
+            in the batch.
     """
 
     def __init__(
@@ -576,6 +680,21 @@ class ActivationWeights(BaseLoss):
     Apply weights to channels, neurons, or spots in the target.
     This loss weighs specific channels or neurons in a given layer, via a weight
     vector.
+
+    Args:
+        target (nn.Module):  The layer to optimize for.
+        weights (torch.Tensor): Weights to apply to targets.
+        neuron (bool): Whether target is a neuron. Defaults to False.
+        x (int, optional):  The x coordinate of the neuron to optimize for. If
+            unspecified, defaults to center, or one unit left of center for even
+            lengths.
+        y (int, optional):  The y coordinate of the neuron to optimize for. If
+            unspecified, defaults to center, or one unit up of center for even
+            heights.
+        wx (int, optional):  Length of neurons to apply the weights to, along the
+            x-axis.
+        wy (int, optional):  Length of neurons to apply the weights to, along the
+            y-axis.
     """
 
     def __init__(

From 61c99daa3b3573e8fb16b9b67ba79e0d1651925a Mon Sep 17 00:00:00 2001
From: SK <limsweekiat@gmail.com>
Date: Wed, 29 Dec 2021 00:59:54 +0800
Subject: [PATCH 3/3] Lint fix

---
 captum/optim/_core/loss.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py
index 02db5f6185..ecb82fa72f 100644
--- a/captum/optim/_core/loss.py
+++ b/captum/optim/_core/loss.py
@@ -260,6 +260,7 @@ class NeuronActivation(BaseLoss):
             optimizing a batch of images. If unspecified, defaults to all images
             in the batch.
     """
+
     def __init__(
         self,
         target: nn.Module,