From 34bedcf91238b19ab1730d762fdceb1d9c06c8dd Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 26 Apr 2022 13:06:44 -0600 Subject: [PATCH 001/174] Add the CLIP ResNet 50x4 model --- captum/optim/models/__init__.py | 9 + .../models/_image/clip_resnet50x4_image.py | 369 ++++++++++++++++++ .../models/_image/clip_resnet50x4_text.py | 187 +++++++++ .../models/test_clip_resnet50x4_image.py | 146 +++++++ .../optim/models/test_clip_resnet50x4_text.py | 64 +++ 5 files changed, 775 insertions(+) create mode 100644 captum/optim/models/_image/clip_resnet50x4_image.py create mode 100644 captum/optim/models/_image/clip_resnet50x4_text.py create mode 100644 tests/optim/models/test_clip_resnet50x4_image.py create mode 100644 tests/optim/models/test_clip_resnet50x4_text.py diff --git a/captum/optim/models/__init__.py b/captum/optim/models/__init__.py index a970e68ec4..77ffed8e6e 100755 --- a/captum/optim/models/__init__.py +++ b/captum/optim/models/__init__.py @@ -6,9 +6,14 @@ replace_layers, skip_layers, ) +from ._image.clip_resnet50x4_image import CLIP_ResNet50x4Image # noqa: F401 +from ._image.clip_resnet50x4_image import clip_resnet50x4_image # noqa: F401 +from ._image.clip_resnet50x4_text import CLIP_ResNet50x4Text # noqa: F401 +from ._image.clip_resnet50x4_text import clip_resnet50x4_text # noqa: F401 from ._image.inception5h_classes import INCEPTION5H_CLASSES # noqa: F401 from ._image.inception_v1 import InceptionV1, googlenet # noqa: F401 + __all__ = [ "RedirectedReluLayer", "SkipLayer", @@ -19,4 +24,8 @@ "InceptionV1", "googlenet", "INCEPTION5H_CLASSES", + "CLIP_ResNet50x4Image", + "clip_resnet50x4_image", + "CLIP_ResNet50x4Text", + "clip_resnet50x4_text", ] diff --git a/captum/optim/models/_image/clip_resnet50x4_image.py b/captum/optim/models/_image/clip_resnet50x4_image.py new file mode 100644 index 0000000000..b64b9b0699 --- /dev/null +++ b/captum/optim/models/_image/clip_resnet50x4_image.py @@ -0,0 +1,369 @@ +from typing import Optional, Type +from warnings import warn + +import torch +from torch import nn + +from captum.optim.models._common import RedirectedReluLayer, SkipLayer + +GS_SAVED_WEIGHTS_URL = ( + "https://pytorch.s3.amazonaws.com/models/captum/clip_resnet50x4_image.pt" +) + + +def clip_resnet50x4_image( + pretrained: bool = False, + progress: bool = True, + model_path: Optional[str] = None, + **kwargs +) -> "CLIP_ResNet50x4Image": + """ + The visual portion of OpenAI's ResNet 50x4 CLIP model from 'Learning Transferable + Visual Models From Natural Language Supervision': https://arxiv.org/abs/2103.00020 + + This model can be combined with the CLIP ResNet 50x4 Text model to create the full + CLIP ResNet 50x4 model. + + AvgPool2d layers were replaced with AdaptiveAvgPool2d to allow for any input height + and width size, though the best results are obtained by using the model's intended + input height and width of 288x288. + + See here for more details: + https://github.com/openai/CLIP + https://github.com/mlfoundations/open_clip + + Args: + + pretrained (bool, optional): If True, returns a pre-trained model. + Default: False + progress (bool, optional): If True, displays a progress bar of the download to + stderr + Default: True + model_path (str, optional): Optional path for the model file. + Default: None + replace_relus_with_redirectedrelu (bool, optional): If True, return pretrained + model with Redirected ReLU in place of ReLU layers. + Default: *True* when pretrained is True otherwise *False* + use_linear_modules_only (bool, optional): If True, return model + with all nonlinear layers replaced with linear equivalents. + Default: False + transform_input (bool, optional): If True, preprocesses the input according to + the method with which it was trained. + Default: *True* when pretrained is True otherwise *False* + + Returns: + **CLIP_ResNet50x4Image** (CLIP_ResNet50x4Image): A CLIP ResNet 50x4 model's + image portion. + """ + if pretrained: + if "transform_input" not in kwargs: + kwargs["transform_input"] = True + if "replace_relus_with_redirectedrelu" not in kwargs: + kwargs["replace_relus_with_redirectedrelu"] = True + if "use_linear_modules_only" not in kwargs: + kwargs["use_linear_modules_only"] = False + + model = CLIP_ResNet50x4Image(**kwargs) + + if model_path is None: + state_dict = torch.hub.load_state_dict_from_url( + GS_SAVED_WEIGHTS_URL, progress=progress, check_hash=False + ) + else: + state_dict = torch.load(model_path, map_location="cpu") + model.load_state_dict(state_dict) + return model + + return CLIP_ResNet50x4Image(**kwargs) + + +class CLIP_ResNet50x4Image(nn.Module): + """ + The visual portion of OpenAI's ResNet 50x4 CLIP model from 'Learning Transferable + Visual Models From Natural Language Supervision': https://arxiv.org/abs/2103.00020 + """ + __constants__ = ["transform_input"] + + def __init__( + self, + transform_input: bool = False, + replace_relus_with_redirectedrelu: bool = False, + use_linear_modules_only: bool = False, + ) -> None: + """ + Args: + + replace_relus_with_redirectedrelu (bool, optional): If True, return + model with Redirected ReLU in place of ReLU layers. + Default: False + use_linear_modules_only (bool, optional): If True, return model with + all nonlinear layers replaced with linear equivalents. + Default: False + transform_input (bool, optional): If True, preprocesses the input according + to the method with which it was trained on. + Default: False + """ + super().__init__() + if use_linear_modules_only: + activ = SkipLayer + else: + if replace_relus_with_redirectedrelu: + activ = RedirectedReluLayer + else: + activ = nn.ReLU + + self.transform_input = transform_input + + # Stem layers + self.conv1 = nn.Conv2d(3, 40, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(40) + self.relu1 = activ() + self.conv2 = nn.Conv2d(40, 40, kernel_size=3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(40) + self.relu2 = activ() + self.conv3 = nn.Conv2d(40, 80, kernel_size=3, padding=1, bias=False) + self.bn3 = nn.BatchNorm2d(80) + self.relu3 = activ() + self.avgpool = nn.AdaptiveAvgPool2d(72) + + # Residual layers + self.layer1 = self._build_layer(80, 80, 4, stride=1, pooling=72, activ=activ) + self.layer2 = self._build_layer(320, 160, 6, stride=2, pooling=36, activ=activ) + self.layer3 = self._build_layer(640, 320, 10, stride=2, pooling=18, activ=activ) + self.layer4 = self._build_layer(1280, 640, 6, stride=2, pooling=9, activ=activ) + + # Attention Pooling + self.attnpool = AttentionPool2d(9, 2560, out_features=640, num_heads=40) + + def _build_layer( + self, + inplanes: int = 80, + planes: int = 80, + blocks: int = 4, + stride: int = 1, + pooling: int = 72, + activ: Type[nn.Module] = nn.ReLU, + ) -> nn.Module: + """ + Residual layer creation helper function. + + Args: + + inplanes (int, optional): The number of input channels / features to use + for the first layer. + Default: 80 + planes (int, optional): The number of output channels / features to use + for the first layer. This variable is then multiplied by 4 to get the + number of input channels / features to use for the subsequent layers. + Default: 80 + blocks (int, optional): The number of Bottleneck layers to create. + Default: 4 + stride (int, optional): The stride value to use for the Bottleneck layers. + Default: 1 + pooling (int, optional): The output size used for nn.AdaptiveAvgPool2d. + Default: 72 + activ (type of nn.Module, optional): The nn.Module class type to use for + activation layers. + Default: nn.ReLU + + Returns: + residual_layer (nn.Sequential): A full residual layer. + """ + layers = [Bottleneck(inplanes, planes, stride, pooling=pooling, activ=activ)] + for _ in range(blocks - 1): + layers += [Bottleneck(planes * 4, planes, pooling=pooling, activ=activ)] + return nn.Sequential(*layers) + + def _transform_input(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + + x (torch.Tensor): An input tensor to normalize the values of. + + Returns: + x (torch.Tensor): A normalized tensor. + """ + assert x.dim() == 3 or x.dim() == 4 + if self.transform_input: + if x.min() < 0.0 or x.max() > 1.0: + warn("Model input has values outside of the range [0, 1].") + x = x.unsqueeze(0) if x.dim() == 3 else x + x = x - torch.tensor( + [0.48145466, 0.4578275, 0.40821073], device=x.device + ).view(3, 1, 1) + x = x / torch.tensor( + [0.26862954, 0.26130258, 0.27577711], device=x.device + ).view(3, 1, 1) + return x + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + + x (torch.Tensor): An input tensor to run through the model. + + Returns: + x (torch.Tensor): The model output. + """ + x = self._transform_input(x) + + # Stem layers + x = self.relu1(self.bn1(self.conv1(x))) + x = self.relu2(self.bn2(self.conv2(x))) + x = self.relu3(self.bn3(self.conv3(x))) + x = self.avgpool(x) + + # Residual layers + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + # Attention Pooling + x = self.attnpool(x) + return x + + +class Bottleneck(nn.Module): + def __init__( + self, + inplanes: int = 80, + planes: int = 80, + stride: int = 1, + pooling: int = 72, + activ: Type[nn.Module] = nn.ReLU, + ) -> None: + """ + Args: + + inplanes (int, optional): The number of input channels / features to use + for the first layer. + Default: 80 + planes (int, optional): The number of output channels / features to use + for the subsequent layers. + Default: 80 + stride (int, optional): The stride value to use for the Bottleneck layers. + Default: 1 + pooling (int, optional): The output size used for nn.AdaptiveAvgPool2d. + Default: 72 + activ (type of nn.Module, optional): The nn.Module class type to use for + activation layers. + Default: nn.ReLU + """ + super().__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.relu1 = activ() + + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.relu2 = activ() + + self.avgpool = nn.AdaptiveAvgPool2d(pooling) + + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu3 = activ() + + if stride > 1 or inplanes != planes * 4: + self.downsample = nn.Sequential( + nn.AdaptiveAvgPool2d(pooling), + nn.Conv2d(inplanes, planes * 4, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(planes * 4), + ) + else: + self.downsample = None + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + + x (torch.Tensor): An input tensor to run through the module. + + Returns: + x (torch.Tensor): The module output. + """ + assert x.dim() == 4 + if self.downsample is not None: + identity = self.downsample(x) + else: + identity = x.clone() + + x = self.relu1(self.bn1(self.conv1(x))) + x = self.relu2(self.bn2(self.conv2(x))) + x = self.avgpool(x) + + x = self.bn3(self.conv3(x)) + identity + x = self.relu3(x) + return x + + +class AttentionPool2d(nn.Module): + def __init__( + self, + spacial_size: int = 9, + in_features: int = 2560, + out_features: int = 640, + num_heads: int = 40, + ) -> None: + """ + Args: + + spacial_size (int, optional): The desired size to user for the positional + embedding. + Default: 9 + in_features (int, optional): The desired input size for the nn.Linear + layers. + Default: 2560 + out_features (int, optional): The desired output size for the nn.Linear + layers. + num_heads (int, optional): The number of heads to use. + Default: 40 + """ + super().__init__() + self.positional_embedding = nn.Parameter( + torch.randn(spacial_size**2 + 1, in_features) / in_features**0.5 + ) + self.k_proj = nn.Linear(in_features, in_features) + self.q_proj = nn.Linear(in_features, in_features) + self.v_proj = nn.Linear(in_features, in_features) + self.c_proj = nn.Linear(in_features, out_features) + self.num_heads = num_heads + + @torch.jit.ignore + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + + x (torch.Tensor): An input tensor to run through the module. + + Returns: + x (torch.Tensor): The module output. + """ + assert x.dim() == 4 + x = x.reshape(*x.shape[:2], -1).permute(2, 0, 1) + x = torch.cat([x.mean(dim=0, keepdim=True), x], dim=0) + x = x + self.positional_embedding[:, None, :] + return torch.nn.functional.multi_head_attention_forward( + query=x, + key=x, + value=x, + embed_dim_to_check=x.shape[-1], + num_heads=self.num_heads, + q_proj_weight=self.q_proj.weight, + k_proj_weight=self.k_proj.weight, + v_proj_weight=self.v_proj.weight, + in_proj_weight=None, + in_proj_bias=torch.cat( + [self.q_proj.bias, self.k_proj.bias, self.v_proj.bias] + ), + bias_k=None, + bias_v=None, + add_zero_attn=False, + dropout_p=0.0, + out_proj_weight=self.c_proj.weight, + out_proj_bias=self.c_proj.bias, + use_separate_proj_weight=True, + training=self.training, + need_weights=False, + )[0][0] diff --git a/captum/optim/models/_image/clip_resnet50x4_text.py b/captum/optim/models/_image/clip_resnet50x4_text.py new file mode 100644 index 0000000000..8069b8d74c --- /dev/null +++ b/captum/optim/models/_image/clip_resnet50x4_text.py @@ -0,0 +1,187 @@ +from typing import Optional + +import math +import torch +from torch import nn + + +GS_SAVED_WEIGHTS_URL = ( + "https://pytorch.s3.amazonaws.com/models/captum/clip_resnet50x4_text.pt" +) + + +def clip_resnet50x4_text( + pretrained: bool = False, + progress: bool = True, + model_path: Optional[str] = None, + **kwargs +) -> "CLIP_ResNet50x4Text": + """ + The text portion of OpenAI's ResNet 50x4 CLIP model from 'Learning Transferable + Visual Models From Natural Language Supervision': https://arxiv.org/abs/2103.00020 + + This model can be combined with the CLIP ResNet 50x4 Image model to create the full + CLIP ResNet 50x4 model. + + See here for more details: + https://github.com/openai/CLIP + https://github.com/mlfoundations/open_clip + + Args: + + pretrained (bool, optional): If True, returns a pre-trained model. + Default: False + progress (bool, optional): If True, displays a progress bar of the download to + stderr + Default: True + model_path (str, optional): Optional path for the model file. + Default: None + width (int, optional): The desired width size to use for the model. + Default: 640 + num_heads (int, optional): The number of heads to use for the model. + Default: 10 + num_residual_layers (int, optional): The number of residual layers to use for + each residual attention block in the model. + Default: 12 + content_length (int, optional): The expected size of text inputs to the model. + Default: 77 + vocab_size (int, optional): The size of the vocab used to train the model. + Default: 49408 + + Returns: + **CLIP_ResNet50x4Text** (CLIP_ResNet50x4Text): A CLIP ResNet 50x4 model's text + portion. + """ + if pretrained: + model = CLIP_ResNet50x4Text(**kwargs) + + if model_path is None: + state_dict = torch.hub.load_state_dict_from_url( + GS_SAVED_WEIGHTS_URL, progress=progress, check_hash=False + ) + else: + state_dict = torch.load(model_path, map_location="cpu") + model.load_state_dict(state_dict) + return model + + return CLIP_ResNet50x4Text(**kwargs) + + +class CLIP_ResNet50x4Text(nn.Module): + """ + The text portion of OpenAI's ResNet 50x4 CLIP model from 'Learning Transferable + Visual Models From Natural Language Supervision': https://arxiv.org/abs/2103.00020 + """ + def __init__( + self, + width: int = 640, + num_heads: int = 10, + num_residual_layers: int = 12, + content_length: int = 77, + vocab_size: int = 49408, + ) -> None: + """ + Args: + + width (int, optional): The desired width size to use for the model. + Default: 640 + num_heads (int, optional): The num number of heads to use for the model. + Default: 10 + num_residual_layers (int, optional): The number of residual layers to use + for each residual attention block. + Default: 12 + content_length (int, optional): The expected size of text inputs to the + model. + Default: 77 + vocab_size (int, optional): The size of the vocab used to train the model. + Default: 49408 + """ + super().__init__() + self.transformer = nn.Sequential( + *[ + ResidualAttentionBlock(width, num_heads, content_length) + for _ in range(num_residual_layers) + ] + ) + self.token_embedding = nn.Embedding(vocab_size, width) + self.positional_embedding = nn.Parameter(torch.empty(content_length, width)) + self.ln_final = nn.LayerNorm(width) + self.text_projection = nn.Parameter(torch.empty(width, width)) + + # logit_scale is only used when combining Text & Image models + self.logit_scale = nn.Parameter(torch.ones([]) * math.log(1 / 0.07)) + + def forward(self, text: torch.Tensor) -> torch.Tensor: + """ + Args: + + x (torch.Tensor): An input tensor to run through the model. + + Returns: + x (torch.Tensor): The model output. + """ + x = self.token_embedding(text) + x = x + self.positional_embedding.to(device=x.device, dtype=x.dtype) + x = self.transformer(x.permute(1, 0, 2)).permute(1, 0, 2) + x = self.ln_final(x) + x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] + return x @ self.text_projection.to(device=x.device, dtype=x.dtype) + + +class QuickGELU(nn.Module): + """ + OpenAI's models use a slightly different GELU than PyTorch's default GELU. + """ + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + + x (torch.Tensor): An input tensor to run through the module. + + Returns: + x (torch.Tensor): The module output. + """ + return x * torch.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Module): + def __init__( + self, width: int = 640, num_heads: int = 10, content_length: int = 77 + ) -> None: + """ + Args: + + width (int, optional): The desired width size to use. + Default: 640 + num_heads (int, optional): The num number of heads to use. + Default: 10 + content_length (int, optional): The desired content_length to use. + Default: 77 + """ + super().__init__() + self.attn = nn.MultiheadAttention(width, num_heads) + self.ln_1 = nn.LayerNorm(width) + self.mlp = nn.Sequential( + nn.Linear(width, width * 4), QuickGELU(), nn.Linear(width * 4, width) + ) + self.ln_2 = nn.LayerNorm(width) + self.attn_mask = ( + torch.empty(content_length, content_length).fill_(float("-inf")).triu_(1) + ) + + def attention(self, x: torch.Tensor) -> torch.Tensor: + attn_mask = self.attn_mask.to(device=x.device, dtype=x.dtype) + return self.attn(x, x, x, need_weights=False, attn_mask=attn_mask)[0] + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + + x (torch.Tensor): An input tensor to run through the module. + + Returns: + x (torch.Tensor): The module output. + """ + x = x + self.attention(self.ln_1(x)) + return x + self.mlp(self.ln_2(x)) diff --git a/tests/optim/models/test_clip_resnet50x4_image.py b/tests/optim/models/test_clip_resnet50x4_image.py new file mode 100644 index 0000000000..aae050646f --- /dev/null +++ b/tests/optim/models/test_clip_resnet50x4_image.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +import unittest +from typing import Type + +import torch + +from captum.optim.models import clip_resnet50x4_image +from captum.optim.models._common import RedirectedReluLayer, SkipLayer +from tests.helpers.basic import BaseTest, assertTensorAlmostEqual +from tests.optim.helpers.models import check_layer_in_model + + +class TestCLIPResNet50x4Image(BaseTest): + def test_load_clip_resnet50x4_image_with_redirected_relu(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping load pretrained CLIP ResNet 50x4 Image due to insufficient" + + " Torch version." + ) + model = clip_resnet50x4_image( + pretrained=True, replace_relus_with_redirectedrelu=True + ) + self.assertTrue(check_layer_in_model(model, RedirectedReluLayer)) + + def test_load_clip_resnet50x4_image_no_redirected_relu(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping load pretrained CLIP ResNet 50x4 Image RedirectedRelu test" + + " due to insufficient Torch version." + ) + model = clip_resnet50x4_image( + pretrained=True, replace_relus_with_redirectedrelu=False + ) + self.assertFalse(check_layer_in_model(model, RedirectedReluLayer)) + self.assertTrue(check_layer_in_model(model, torch.nn.ReLU)) + + def test_load_clip_resnet50x4_image_linear(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping load pretrained CLIP ResNet 50x4 Image linear test due to" + + " insufficient Torch version." + ) + model = clip_resnet50x4_image(pretrained=True, use_linear_modules_only=True) + self.assertFalse(check_layer_in_model(model, RedirectedReluLayer)) + self.assertFalse(check_layer_in_model(model, torch.nn.ReLU)) + self.assertTrue(check_layer_in_model(model, SkipLayer)) + + def test_clip_resnet50x4_image_transform(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping CLIP ResNet 50x4 Image internal transform test due to" + + " insufficient Torch version." + ) + x = torch.randn(1, 3, 288, 288).clamp(0, 1) + model = clip_resnet50x4_image(pretrained=True) + output = model._transform_input(x) + expected_output = x.clone() - torch.tensor( + [0.48145466, 0.4578275, 0.40821073] + ).view(3, 1, 1) + expected_output = expected_output / torch.tensor( + [0.26862954, 0.26130258, 0.27577711] + ).view(3, 1, 1) + assertTensorAlmostEqual(self, output, expected_output, 0) + + def test_clip_resnet50x4_image_transform_warning(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping CLIP ResNet 50x4 Image internal transform warning test due" + + " to insufficient Torch version." + ) + x = torch.stack( + [torch.ones(3, 112, 112) * -1, torch.ones(3, 112, 112) * 2], dim=0 + ) + model = clip_resnet50x4_image(pretrained=True) + with self.assertWarns(UserWarning): + model._transform_input(x) + + def test_clip_resnet50x4_image_load_and_forward(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping basic pretrained CLIP ResNet 50x4 Image forward test due to" + + " insufficient Torch version." + ) + x = torch.zeros(1, 3, 288, 288) + model = clip_resnet50x4_image(pretrained=True) + output = model(x) + self.assertEqual(list(output.shape), [1, 640]) + + def test_untrained_clip_resnet50x4_image_load_and_forward(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping basic untrained CLIP ResNet 50x4 Image forward test due to" + + " insufficient Torch version." + ) + x = torch.zeros(1, 3, 288, 288) + model = clip_resnet50x4_image(pretrained=False) + output = model(x) + self.assertEqual(list(output.shape), [1, 640]) + + def test_clip_resnet50x4_image_load_and_forward_diff_sizes(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping pretrained CLIP ResNet 50x4 Image forward with different" + + " sized inputs test due to insufficient Torch version." + ) + x = torch.zeros(1, 3, 512, 512) + x2 = torch.zeros(1, 3, 126, 224) + model = clip_resnet50x4_image(pretrained=True) + + output = model(x) + output2 = model(x2) + + self.assertEqual(list(output.shape), [1, 640]) + self.assertEqual(list(output2.shape), [1, 640]) + + def test_clip_resnet50x4_image_forward_cuda(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping pretrained CLIP ResNet 50x4 Image forward CUDA test due to" + + " insufficient Torch version." + ) + if not torch.cuda.is_available(): + raise unittest.SkipTest( + "Skipping pretrained CLIP ResNet 50x4 Image forward CUDA test due to" + + " not supporting CUDA." + ) + x = torch.zeros(1, 3, 224, 224).cuda() + model = clip_resnet50x4_image(pretrained=True).cuda() + output = model(x) + + self.assertTrue(output.is_cuda) + self.assertEqual(list(output.shape), [1, 640]) + + def test_clip_resnet50x4_image_jit_module_no_redirected_relu(self) -> None: + if torch.__version__ <= "1.8.0": + raise unittest.SkipTest( + "Skipping pretrained CLIP ResNet 50x4 Image load & JIT module with" + + " no redirected relu test due to insufficient Torch version." + ) + x = torch.zeros(1, 3, 224, 224) + model = clip_resnet50x4_image( + pretrained=True, replace_relus_with_redirectedrelu=False + ) + jit_model = torch.jit.script(model) + output = jit_model(x) + self.assertEqual(list(output.shape), [1, 640]) diff --git a/tests/optim/models/test_clip_resnet50x4_text.py b/tests/optim/models/test_clip_resnet50x4_text.py new file mode 100644 index 0000000000..69352ca277 --- /dev/null +++ b/tests/optim/models/test_clip_resnet50x4_text.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +import unittest + +import torch + +from captum.optim.models import clip_resnet50x4_text +from tests.helpers.basic import BaseTest, assertTensorAlmostEqual + + +class TestCLIPResNet50x4Text(BaseTest): + def test_clip_resnet50x4_text_logit_scale(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping basic pretrained CLIP ResNet 50x4 Text logit scale test due" + + " to insufficient Torch version." + ) + model = clip_resnet50x4_text(pretrained=True) + expected_logit_scale = torch.tensor([4.605170249938965]) + assertTensorAlmostEqual(self, model.logit_scale, expected_logit_scale) + + def test_clip_resnet50x4_text_load_and_forward(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping basic pretrained CLIP ResNet 50x4 Text forward test due to" + + " insufficient Torch version." + ) + # Start & End tokens: 49405, 49406 + x = torch.cat([torch.tensor([49405, 49406]), torch.zeros(77 - 2)]) + x = x.int()[None, :] + model = clip_resnet50x4_text(pretrained=True) + output = model(x) + self.assertEqual(list(output.shape), [1, 640]) + + def test_clip_resnet50x4_text_forward_cuda(self) -> None: + if torch.__version__ <= "1.6.0": + raise unittest.SkipTest( + "Skipping pretrained CLIP ResNet 50x4 Text forward CUDA test due to" + + " insufficient Torch version." + ) + if not torch.cuda.is_available(): + raise unittest.SkipTest( + "Skipping pretrained CLIP ResNet 50x4 Text forward CUDA test due to" + + " not supporting CUDA." + ) + x = torch.cat([torch.tensor([49405, 49406]), torch.zeros(77 - 2)]).cuda() + x = x.int()[None, :] + model = clip_resnet50x4_text(pretrained=True).cuda() + output = model(x) + + self.assertTrue(output.is_cuda) + self.assertEqual(list(output.shape), [1, 640]) + + def test_clip_resnet50x4_text_jit_module(self) -> None: + if torch.__version__ <= "1.8.0": + raise unittest.SkipTest( + "Skipping pretrained CLIP ResNet 50x4 Text load & JIT module" + + " test due to insufficient Torch version." + ) + x = torch.cat([torch.tensor([49405, 49406]), torch.zeros(77 - 2)]) + x = x.int()[None, :] + model = clip_resnet50x4_text(pretrained=True) + jit_model = torch.jit.script(model) + output = jit_model(x) + self.assertEqual(list(output.shape), [1, 640]) From 599d8e1eb668b4c942183eabc053ce11c271c478 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 13 May 2022 15:21:58 -0600 Subject: [PATCH 002/174] Update CLIP model for new testing & linting --- .../models/_image/clip_resnet50x4_image.py | 37 +++++------ .../models/_image/clip_resnet50x4_text.py | 9 +-- .../models/test_clip_resnet50x4_image.py | 62 +++++++++++-------- .../optim/models/test_clip_resnet50x4_text.py | 18 +++--- 4 files changed, 65 insertions(+), 61 deletions(-) diff --git a/captum/optim/models/_image/clip_resnet50x4_image.py b/captum/optim/models/_image/clip_resnet50x4_image.py index b64b9b0699..4fc86a8880 100644 --- a/captum/optim/models/_image/clip_resnet50x4_image.py +++ b/captum/optim/models/_image/clip_resnet50x4_image.py @@ -1,9 +1,8 @@ -from typing import Optional, Type +from typing import Any, Optional, Type from warnings import warn import torch -from torch import nn - +import torch.nn as nn from captum.optim.models._common import RedirectedReluLayer, SkipLayer GS_SAVED_WEIGHTS_URL = ( @@ -15,7 +14,7 @@ def clip_resnet50x4_image( pretrained: bool = False, progress: bool = True, model_path: Optional[str] = None, - **kwargs + **kwargs: Any, ) -> "CLIP_ResNet50x4Image": """ The visual portion of OpenAI's ResNet 50x4 CLIP model from 'Learning Transferable @@ -24,9 +23,8 @@ def clip_resnet50x4_image( This model can be combined with the CLIP ResNet 50x4 Text model to create the full CLIP ResNet 50x4 model. - AvgPool2d layers were replaced with AdaptiveAvgPool2d to allow for any input height - and width size, though the best results are obtained by using the model's intended - input height and width of 288x288. + Note that model inputs are expected to have a shape of: [B, 3, 288, 288] or + [3, 288, 288]. See here for more details: https://github.com/openai/CLIP @@ -82,6 +80,7 @@ class CLIP_ResNet50x4Image(nn.Module): The visual portion of OpenAI's ResNet 50x4 CLIP model from 'Learning Transferable Visual Models From Natural Language Supervision': https://arxiv.org/abs/2103.00020 """ + __constants__ = ["transform_input"] def __init__( @@ -124,13 +123,13 @@ def __init__( self.conv3 = nn.Conv2d(40, 80, kernel_size=3, padding=1, bias=False) self.bn3 = nn.BatchNorm2d(80) self.relu3 = activ() - self.avgpool = nn.AdaptiveAvgPool2d(72) + self.avgpool = nn.AvgPool2d(2) # Residual layers - self.layer1 = self._build_layer(80, 80, 4, stride=1, pooling=72, activ=activ) - self.layer2 = self._build_layer(320, 160, 6, stride=2, pooling=36, activ=activ) - self.layer3 = self._build_layer(640, 320, 10, stride=2, pooling=18, activ=activ) - self.layer4 = self._build_layer(1280, 640, 6, stride=2, pooling=9, activ=activ) + self.layer1 = self._build_layer(80, 80, blocks=4, stride=1, activ=activ) + self.layer2 = self._build_layer(320, 160, blocks=6, stride=2, activ=activ) + self.layer3 = self._build_layer(640, 320, blocks=10, stride=2, activ=activ) + self.layer4 = self._build_layer(1280, 640, blocks=6, stride=2, activ=activ) # Attention Pooling self.attnpool = AttentionPool2d(9, 2560, out_features=640, num_heads=40) @@ -141,7 +140,6 @@ def _build_layer( planes: int = 80, blocks: int = 4, stride: int = 1, - pooling: int = 72, activ: Type[nn.Module] = nn.ReLU, ) -> nn.Module: """ @@ -160,8 +158,6 @@ def _build_layer( Default: 4 stride (int, optional): The stride value to use for the Bottleneck layers. Default: 1 - pooling (int, optional): The output size used for nn.AdaptiveAvgPool2d. - Default: 72 activ (type of nn.Module, optional): The nn.Module class type to use for activation layers. Default: nn.ReLU @@ -169,9 +165,9 @@ def _build_layer( Returns: residual_layer (nn.Sequential): A full residual layer. """ - layers = [Bottleneck(inplanes, planes, stride, pooling=pooling, activ=activ)] + layers = [Bottleneck(inplanes, planes, stride, activ=activ)] for _ in range(blocks - 1): - layers += [Bottleneck(planes * 4, planes, pooling=pooling, activ=activ)] + layers += [Bottleneck(planes * 4, planes, activ=activ)] return nn.Sequential(*layers) def _transform_input(self, x: torch.Tensor) -> torch.Tensor: @@ -230,7 +226,6 @@ def __init__( inplanes: int = 80, planes: int = 80, stride: int = 1, - pooling: int = 72, activ: Type[nn.Module] = nn.ReLU, ) -> None: """ @@ -244,8 +239,6 @@ def __init__( Default: 80 stride (int, optional): The stride value to use for the Bottleneck layers. Default: 1 - pooling (int, optional): The output size used for nn.AdaptiveAvgPool2d. - Default: 72 activ (type of nn.Module, optional): The nn.Module class type to use for activation layers. Default: nn.ReLU @@ -259,7 +252,7 @@ def __init__( self.bn2 = nn.BatchNorm2d(planes) self.relu2 = activ() - self.avgpool = nn.AdaptiveAvgPool2d(pooling) + self.avgpool = nn.AvgPool2d(stride) if stride > 1 else nn.Identity() self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) @@ -267,7 +260,7 @@ def __init__( if stride > 1 or inplanes != planes * 4: self.downsample = nn.Sequential( - nn.AdaptiveAvgPool2d(pooling), + nn.AvgPool2d(stride), nn.Conv2d(inplanes, planes * 4, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(planes * 4), ) diff --git a/captum/optim/models/_image/clip_resnet50x4_text.py b/captum/optim/models/_image/clip_resnet50x4_text.py index 8069b8d74c..66cb58ce69 100644 --- a/captum/optim/models/_image/clip_resnet50x4_text.py +++ b/captum/optim/models/_image/clip_resnet50x4_text.py @@ -1,8 +1,8 @@ -from typing import Optional - import math +from typing import Any, Optional + import torch -from torch import nn +import torch.nn as nn GS_SAVED_WEIGHTS_URL = ( @@ -14,7 +14,7 @@ def clip_resnet50x4_text( pretrained: bool = False, progress: bool = True, model_path: Optional[str] = None, - **kwargs + **kwargs: Any, ) -> "CLIP_ResNet50x4Text": """ The text portion of OpenAI's ResNet 50x4 CLIP model from 'Learning Transferable @@ -72,6 +72,7 @@ class CLIP_ResNet50x4Text(nn.Module): The text portion of OpenAI's ResNet 50x4 CLIP model from 'Learning Transferable Visual Models From Natural Language Supervision': https://arxiv.org/abs/2103.00020 """ + def __init__( self, width: int = 640, diff --git a/tests/optim/models/test_clip_resnet50x4_image.py b/tests/optim/models/test_clip_resnet50x4_image.py index aae050646f..beb3d33595 100644 --- a/tests/optim/models/test_clip_resnet50x4_image.py +++ b/tests/optim/models/test_clip_resnet50x4_image.py @@ -1,18 +1,17 @@ #!/usr/bin/env python3 import unittest -from typing import Type import torch - from captum.optim.models import clip_resnet50x4_image from captum.optim.models._common import RedirectedReluLayer, SkipLayer +from packaging import version from tests.helpers.basic import BaseTest, assertTensorAlmostEqual from tests.optim.helpers.models import check_layer_in_model class TestCLIPResNet50x4Image(BaseTest): def test_load_clip_resnet50x4_image_with_redirected_relu(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping load pretrained CLIP ResNet 50x4 Image due to insufficient" + " Torch version." @@ -23,7 +22,7 @@ def test_load_clip_resnet50x4_image_with_redirected_relu(self) -> None: self.assertTrue(check_layer_in_model(model, RedirectedReluLayer)) def test_load_clip_resnet50x4_image_no_redirected_relu(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping load pretrained CLIP ResNet 50x4 Image RedirectedRelu test" + " due to insufficient Torch version." @@ -35,7 +34,7 @@ def test_load_clip_resnet50x4_image_no_redirected_relu(self) -> None: self.assertTrue(check_layer_in_model(model, torch.nn.ReLU)) def test_load_clip_resnet50x4_image_linear(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping load pretrained CLIP ResNet 50x4 Image linear test due to" + " insufficient Torch version." @@ -46,7 +45,7 @@ def test_load_clip_resnet50x4_image_linear(self) -> None: self.assertTrue(check_layer_in_model(model, SkipLayer)) def test_clip_resnet50x4_image_transform(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping CLIP ResNet 50x4 Image internal transform test due to" + " insufficient Torch version." @@ -63,20 +62,20 @@ def test_clip_resnet50x4_image_transform(self) -> None: assertTensorAlmostEqual(self, output, expected_output, 0) def test_clip_resnet50x4_image_transform_warning(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping CLIP ResNet 50x4 Image internal transform warning test due" + " to insufficient Torch version." ) x = torch.stack( - [torch.ones(3, 112, 112) * -1, torch.ones(3, 112, 112) * 2], dim=0 + [torch.ones(3, 288, 288) * -1, torch.ones(3, 288, 288) * 2], dim=0 ) model = clip_resnet50x4_image(pretrained=True) with self.assertWarns(UserWarning): model._transform_input(x) def test_clip_resnet50x4_image_load_and_forward(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping basic pretrained CLIP ResNet 50x4 Image forward test due to" + " insufficient Torch version." @@ -87,7 +86,7 @@ def test_clip_resnet50x4_image_load_and_forward(self) -> None: self.assertEqual(list(output.shape), [1, 640]) def test_untrained_clip_resnet50x4_image_load_and_forward(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping basic untrained CLIP ResNet 50x4 Image forward test due to" + " insufficient Torch version." @@ -97,24 +96,21 @@ def test_untrained_clip_resnet50x4_image_load_and_forward(self) -> None: output = model(x) self.assertEqual(list(output.shape), [1, 640]) - def test_clip_resnet50x4_image_load_and_forward_diff_sizes(self) -> None: - if torch.__version__ <= "1.6.0": + def test_clip_resnet50x4_image_warning(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( - "Skipping pretrained CLIP ResNet 50x4 Image forward with different" - + " sized inputs test due to insufficient Torch version." + "Skipping pretrained CLIP ResNet 50x4 Image transform input" + + " warning test due to insufficient Torch version." ) - x = torch.zeros(1, 3, 512, 512) - x2 = torch.zeros(1, 3, 126, 224) + x = torch.stack( + [torch.ones(3, 288, 288) * -1, torch.ones(3, 288, 288) * 2], dim=0 + ) model = clip_resnet50x4_image(pretrained=True) - - output = model(x) - output2 = model(x2) - - self.assertEqual(list(output.shape), [1, 640]) - self.assertEqual(list(output2.shape), [1, 640]) + with self.assertWarns(UserWarning): + _ = model._transform_input(x) def test_clip_resnet50x4_image_forward_cuda(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping pretrained CLIP ResNet 50x4 Image forward CUDA test due to" + " insufficient Torch version." @@ -124,7 +120,7 @@ def test_clip_resnet50x4_image_forward_cuda(self) -> None: "Skipping pretrained CLIP ResNet 50x4 Image forward CUDA test due to" + " not supporting CUDA." ) - x = torch.zeros(1, 3, 224, 224).cuda() + x = torch.zeros(1, 3, 288, 288).cuda() model = clip_resnet50x4_image(pretrained=True).cuda() output = model(x) @@ -132,15 +128,29 @@ def test_clip_resnet50x4_image_forward_cuda(self) -> None: self.assertEqual(list(output.shape), [1, 640]) def test_clip_resnet50x4_image_jit_module_no_redirected_relu(self) -> None: - if torch.__version__ <= "1.8.0": + if version.parse(torch.__version__) <= version.parse("1.8.0"): raise unittest.SkipTest( "Skipping pretrained CLIP ResNet 50x4 Image load & JIT module with" + " no redirected relu test due to insufficient Torch version." ) - x = torch.zeros(1, 3, 224, 224) + x = torch.zeros(1, 3, 288, 288) model = clip_resnet50x4_image( pretrained=True, replace_relus_with_redirectedrelu=False ) jit_model = torch.jit.script(model) output = jit_model(x) self.assertEqual(list(output.shape), [1, 640]) + + def test_clip_resnet50x4_image_jit_module_with_redirected_relu(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.8.0"): + raise unittest.SkipTest( + "Skipping pretrained CLIP ResNet 50x4 Image load & JIT module with" + + " redirected relu test due to insufficient Torch version." + ) + x = torch.zeros(1, 3, 288, 288) + model = clip_resnet50x4_image( + pretrained=True, replace_relus_with_redirectedrelu=True + ) + jit_model = torch.jit.script(model) + output = jit_model(x) + self.assertEqual(list(output.shape), [1, 640]) diff --git a/tests/optim/models/test_clip_resnet50x4_text.py b/tests/optim/models/test_clip_resnet50x4_text.py index 69352ca277..3d7f9d7cd5 100644 --- a/tests/optim/models/test_clip_resnet50x4_text.py +++ b/tests/optim/models/test_clip_resnet50x4_text.py @@ -2,37 +2,37 @@ import unittest import torch - from captum.optim.models import clip_resnet50x4_text +from packaging import version from tests.helpers.basic import BaseTest, assertTensorAlmostEqual class TestCLIPResNet50x4Text(BaseTest): def test_clip_resnet50x4_text_logit_scale(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping basic pretrained CLIP ResNet 50x4 Text logit scale test due" + " to insufficient Torch version." ) model = clip_resnet50x4_text(pretrained=True) - expected_logit_scale = torch.tensor([4.605170249938965]) + expected_logit_scale = torch.tensor(4.605170249938965) assertTensorAlmostEqual(self, model.logit_scale, expected_logit_scale) def test_clip_resnet50x4_text_load_and_forward(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping basic pretrained CLIP ResNet 50x4 Text forward test due to" + " insufficient Torch version." ) # Start & End tokens: 49405, 49406 x = torch.cat([torch.tensor([49405, 49406]), torch.zeros(77 - 2)]) - x = x.int()[None, :] + x = x[None, :].long() model = clip_resnet50x4_text(pretrained=True) output = model(x) self.assertEqual(list(output.shape), [1, 640]) def test_clip_resnet50x4_text_forward_cuda(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping pretrained CLIP ResNet 50x4 Text forward CUDA test due to" + " insufficient Torch version." @@ -43,7 +43,7 @@ def test_clip_resnet50x4_text_forward_cuda(self) -> None: + " not supporting CUDA." ) x = torch.cat([torch.tensor([49405, 49406]), torch.zeros(77 - 2)]).cuda() - x = x.int()[None, :] + x = x[None, :].long() model = clip_resnet50x4_text(pretrained=True).cuda() output = model(x) @@ -51,13 +51,13 @@ def test_clip_resnet50x4_text_forward_cuda(self) -> None: self.assertEqual(list(output.shape), [1, 640]) def test_clip_resnet50x4_text_jit_module(self) -> None: - if torch.__version__ <= "1.8.0": + if version.parse(torch.__version__) <= version.parse("1.8.0"): raise unittest.SkipTest( "Skipping pretrained CLIP ResNet 50x4 Text load & JIT module" + " test due to insufficient Torch version." ) x = torch.cat([torch.tensor([49405, 49406]), torch.zeros(77 - 2)]) - x = x.int()[None, :] + x = x[None, :].long() model = clip_resnet50x4_text(pretrained=True) jit_model = torch.jit.script(model) output = jit_model(x) From f3d3e1d8088c85a79f7da374e987b649a558fba3 Mon Sep 17 00:00:00 2001 From: John Reese Date: Sun, 15 May 2022 12:01:37 -0700 Subject: [PATCH 003/174] deployment of pyfmt with usort 1.0 Summary: This deploys pyfmt with usort 1.0 and the new import merging behavior. Facebook This is part of the final rollout, announced here: https://fb.workplace.com/groups/pyfmt/posts/1011066416197541/ Preemptive SEV: S271899 Hand rolled on devserver and laptops, with binaries hosted on manifold bucket `pyfi_wheels`. Couldn't use MSDK bump due to issue with make_par on sandcastle Macs: https://fb.workplace.com/groups/fbpython/posts/7503431436364825/ pokemon_lift Reviewed By: zertosh Differential Revision: D36394396 fbshipit-source-id: 7cee2a05261e3281fe86360cdb2faa62df1d9a4e --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 48bc6f4057..e03ab29915 100755 --- a/setup.py +++ b/setup.py @@ -73,7 +73,7 @@ def report(*args): "sphinx-autodoc-typehints", "sphinxcontrib-katex", "mypy>=0.760", - "usort==0.6.4", + "usort==1.0.2", "ufmt", "scikit-learn", "annoy", From 33d2b75ffad8413beec8b29836b5873dcf487965 Mon Sep 17 00:00:00 2001 From: John Reese Date: Sun, 15 May 2022 12:53:03 -0700 Subject: [PATCH 004/174] apply import merging for fbcode (8 of 11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Applies new import merging and sorting from µsort v1.0. When merging imports, µsort will make a best-effort to move associated comments to match merged elements, but there are known limitations due to the diynamic nature of Python and developer tooling. These changes should not produce any dangerous runtime changes, but may require touch-ups to satisfy linters and other tooling. Note that µsort uses case-insensitive, lexicographical sorting, which results in a different ordering compared to isort. This provides a more consistent sorting order, matching the case-insensitive order used when sorting import statements by module name, and ensures that "frog", "FROG", and "Frog" always sort next to each other. For details on µsort's sorting and merging semantics, see the user guide: https://usort.readthedocs.io/en/stable/guide.html#sorting Reviewed By: lisroach Differential Revision: D36402214 fbshipit-source-id: b641bfa9d46242188524d4ae2c44998922a62b4c --- captum/influence/_core/tracincp.py | 4 ++-- captum/influence/_core/tracincp_fast_rand_proj.py | 14 +++++++------- captum/influence/_utils/common.py | 2 +- tests/influence/_core/test_tracin_show_progress.py | 8 +++----- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/captum/influence/_core/tracincp.py b/captum/influence/_core/tracincp.py index d3671767ce..d5acc2dfef 100644 --- a/captum/influence/_core/tracincp.py +++ b/captum/influence/_core/tracincp.py @@ -9,11 +9,11 @@ Callable, Iterator, List, + NamedTuple, Optional, - Union, Tuple, - NamedTuple, Type, + Union, ) import torch diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py index 66007d9e50..cfbf7b47d4 100644 --- a/captum/influence/_core/tracincp_fast_rand_proj.py +++ b/captum/influence/_core/tracincp_fast_rand_proj.py @@ -1,26 +1,26 @@ #!/usr/bin/env python3 import warnings -from typing import Any, Callable, Iterator, List, Optional, Union, Tuple +from typing import Any, Callable, Iterator, List, Optional, Tuple, Union import torch -from captum._utils.common import _get_module_from_name, _format_inputs +from captum._utils.common import _format_inputs, _get_module_from_name from captum._utils.progress import progress from captum.influence._core.tracincp import ( - TracInCPBase, - KMostInfluentialResults, _influence_route_to_helpers, + KMostInfluentialResults, + TracInCPBase, ) from captum.influence._utils.common import ( + _DatasetFromList, + _get_k_most_influential_helper, _jacobian_loss_wrt_inputs, _load_flexible_state_dict, _tensor_batch_dot, - _get_k_most_influential_helper, - _DatasetFromList, ) from captum.influence._utils.nearest_neighbors import ( - NearestNeighbors, AnnoyNearestNeighbors, + NearestNeighbors, ) from captum.log import log_usage from torch import Tensor diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index 28c76ebbc3..10783eaf4c 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from typing import Callable, Optional, Tuple, Union, Any, List +from typing import Any, Callable, List, Optional, Tuple, Union import torch import torch.nn as nn diff --git a/tests/influence/_core/test_tracin_show_progress.py b/tests/influence/_core/test_tracin_show_progress.py index b4af4d3118..5b35352880 100644 --- a/tests/influence/_core/test_tracin_show_progress.py +++ b/tests/influence/_core/test_tracin_show_progress.py @@ -6,15 +6,13 @@ import torch.nn as nn from captum.influence._core.tracincp import TracInCP -from captum.influence._core.tracincp_fast_rand_proj import ( - TracInCPFast, -) +from captum.influence._core.tracincp_fast_rand_proj import TracInCPFast from parameterized import parameterized from tests.helpers.basic import BaseTest from tests.influence._utils.common import ( - get_random_model_and_data, - DataInfluenceConstructor, build_test_name_func, + DataInfluenceConstructor, + get_random_model_and_data, ) From d27e6c2fbe8df0e0ec6061ec4b4e0884efc70ffa Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 17 May 2022 10:16:18 -0600 Subject: [PATCH 005/174] Add CLIP loss objectives --- captum/optim/_core/loss.py | 216 ++++++++++++++++++++++++- captum/optim/_utils/image/common.py | 53 +++++- tests/optim/core/test_loss.py | 159 ++++++++++++++++++ tests/optim/utils/image/test_common.py | 34 ++++ 4 files changed, 460 insertions(+), 2 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 66bb4c40c2..1dca3c50ad 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -5,7 +5,11 @@ import torch import torch.nn as nn -from captum.optim._utils.image.common import _dot_cossim, get_neuron_pos +from captum.optim._utils.image.common import ( + _create_new_vector, + _dot_cossim, + get_neuron_pos, +) from captum.optim._utils.typing import ModuleOutputMapping @@ -837,6 +841,216 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return activations +@loss_wrapper +class L2Mean(BaseLoss): + """ + Simple L2Loss penalty where the mean is used instead of the square root of the + sum. + + Used for CLIP models in https://distill.pub/2021/multimodal-neurons/ as per the + supplementary code: + https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py + """ + + def __init__( + self, + target: torch.nn.Module, + channel_index: Optional[int] = None, + constant: float = 0.5, + batch_index: Optional[int] = None, + ) -> None: + """ + Args: + + target (nn.Module): A target layer, transform, or image parameterization + instance. + channel_index (int, optional): Optionally only target a specific channel. + If set to None, all channels with be used. + Default: None + constant (float, optional): Constant value to deduct from the activations. + Default: 0.5 + batch_index (int, optional): The index of activations to optimize if + optimizing a batch of activations. If set to None, defaults to all + activations in the batch. + Default: None + """ + BaseLoss.__init__(self, target, batch_index) + self.constant = constant + self.channel_index = channel_index + + def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: + activations = targets_to_values[self.target][ + self.batch_index[0] : self.batch_index[1] + ] + if self.channel_index is not None: + activations = activations[:, self.channel_index : self.channel_index + 1] + return ((activations - self.constant) ** 2).mean() + + +@loss_wrapper +class VectorLoss(BaseLoss): + """ + This objective is useful for optimizing towards channel directions. This can + helpful for visualizing models like OpenAI's CLIP. + + This loss objective is similar to the Direction objective, except it computes the + matrix product of the activations and vector, rather than the cosine similarity. + In addition to optimizing towards channel directions, this objective can also + perform a similar role to the ChannelActivation objective by using one-hot 1D + vectors. + + See here for more details: + https://distill.pub/2021/multimodal-neurons/ + https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py + """ + + def __init__( + self, + target: torch.nn.Module, + vec: torch.Tensor, + activation_fn: Optional[Callable] = torch.nn.functional.relu, + move_channel_dim_to_final_dim: bool = True, + batch_index: Optional[int] = None, + ) -> None: + """ + Args: + + target (nn.Module): A target layer instance. + vec (torch.Tensor): A direction vector to use, with a compatible shape for + computing the matrix product of the activations. See torch.matmul for + See torch.matmul for more details on compatible shapes: + https://pytorch.org/docs/stable/generated/torch.matmul.html + By default, vec is expected to share the same size as the channel + dimension of the activations. + activation_fn (Callable, optional): An optional activation function to + apply to the activations before computing the matrix product. If set + to None, then no activation function will be used. + Default: torch.nn.functional.relu + move_channel_dim_to_final_dim (bool, optional): Whether or not to move the + channel dimension to the last dimension before computing the matrix + product. + Default: True + batch_index (int, optional): The index of activations to optimize if + optimizing a batch of activations. If set to None, defaults to all + activations in the batch. + Default: None + """ + BaseLoss.__init__(self, target, batch_index) + self.vec = vec + self.activation_fn = activation_fn + self.move_channel_dim_to_final_dim = move_channel_dim_to_final_dim + + def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: + activations = targets_to_values[self.target] + activations = activations[self.batch_index[0] : self.batch_index[1]] + return _create_new_vector( + activations, + vec=self.vec, + activation_fn=self.activation_fn, + move_channel_dim_to_final_dim=self.move_channel_dim_to_final_dim, + ).mean() + + +@loss_wrapper +class FacetLoss(BaseLoss): + """ + The Facet loss objective used for Faceted Feature Visualization as described in: + https://distill.pub/2021/multimodal-neurons/#faceted-feature-visualization + https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py + + The FacetLoss objective allows us to steer feature visualization towards a + particular theme / concept. This is done by using the weights from linear probes + trained on the lower layers of a model to discriminate between a certain theme or + concept and generic natural images. + """ + + def __init__( + self, + vec: torch.Tensor, + ultimate_target: torch.nn.Module, + layer_target: Union[torch.nn.Module, List[torch.nn.Module]], + facet_weights: torch.Tensor, + strength: Optional[Union[float, List[float]]] = None, + batch_index: Optional[Union[int, List[int]]] = None, + ) -> None: + """ + Args: + + vec (torch.Tensor): A 1D channel vector. + ultimate_target (nn.Module): The main target layer that we are + visualizing targets from. This is normally the penultimate layer of + the model. + layer_target (nn.Module): A layer that we have facet_weights for. This + target layer should be below the ultimate_target layer in the model. + strength (float, list of float, optional): A list of floats to use for batch + dimension weighting. Default is set to None for no weighting. + Default: None + facet_weights (torch.Tensor): Weighting that steers the objective + towards a particular theme or concept. These weight values should + come from linear probes trained on layers in target_layers. + batch_index (int, optional): The index of the activations to optimize if + optimizing a batch of activations. If set to None, defaults to all + activations in the batch. + Default: None + """ + BaseLoss.__init__(self, [ultimate_target, layer_target], batch_index) + self.ultimate_target = ultimate_target + self.layer_target = layer_target + self.vec = vec + self.strength = strength + assert facet_weights.dim() == 4 or facet_weights.dim() == 2 + self.facet_weights = facet_weights + + def _get_strength(self, batch: int, device: torch.device) -> torch.Tensor: + """ + Calculate batch weighting. + + Args: + + batch (int): The size of the batch dimension to use. + device (torch.device): The device to use. + + Returns: + strength_t (torch.Tensor): A tensor containing the weights to multiply the + different batch dimensions by. + """ + if isinstance(self.strength, (tuple, list)): + strength_t = torch.linspace( + self.strength[0], + self.strength[1], + steps=batch, + device=device, + ) + else: + strength_t = torch.ones([1], device=device) * self.strength + return strength_t[:, None, None, None] + + def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: + activations_ultimate = targets_to_values[self.ultimate_target] + activations_ultimate = activations_ultimate + new_vec = _create_new_vector(activations_ultimate, self.vec)[ + self.batch_index[0] : self.batch_index[1] + ] + target_activations = targets_to_values[self.layer_target] + + layer_grad = torch.autograd.grad( + outputs=new_vec, + inputs=target_activations, + grad_outputs=torch.ones_like(new_vec), + retain_graph=True, + )[0] + layer = target_activations[self.batch_index[0] : self.batch_index[1]] + + flat_attr = layer * torch.nn.functional.relu(layer_grad.detach()) + if self.facet_weights.dim() == 2 and flat_attr.dim() == 4: + flat_attr = torch.sum(flat_attr, dim=(2, 3)) + + if self.strength: + strength_t = self._get_strength(new_vec.shape[0], flat_attr.device) + flat_attr = strength_t * flat_attr + return torch.sum(flat_attr * self.facet_weights) + + def sum_loss_list( loss_list: List, to_scalar_fn: Callable[[torch.Tensor], torch.Tensor] = torch.mean, diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py index f1cdc5f477..31af3169ef 100644 --- a/captum/optim/_utils/image/common.py +++ b/captum/optim/_utils/image/common.py @@ -1,5 +1,5 @@ import math -from typing import List, Optional, Tuple, Union +from typing import Callable, List, Optional, Tuple, Union import matplotlib.pyplot as plt import numpy as np @@ -363,3 +363,54 @@ def hex2base10(x: str) -> float: * ((1 - (-x - 0.5) * 2) * color_list[1] + (-x - 0.5) * 2 * color_list[0]) ).permute(2, 0, 1) return color_tensor + + +def _create_new_vector( + x: torch.Tensor, + vec: torch.Tensor, + activation_fn: Optional[ + Callable[[torch.Tensor], torch.Tensor] + ] = torch.nn.functional.relu, + move_channel_dim_to_final_dim: bool = True, +) -> torch.Tensor: + """ + Create a vector using a given set of activations and another vector. + This function is intended for use in CLIP related loss objectives. + + https://distill.pub/2021/multimodal-neurons/ + https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py + The einsum equation: "ijkl,j->ikl", used by the paper's associated code is the + same thing as: "[..., C] @ vec", where vec has a shape of 'C'. + + Args: + + x (torch.Tensor): A set of 2d or 4d activations. + vec (torch.Tensor): A direction vector to use, with a compatible shape for + computing the matrix product of the activations. See torch.matmul for + See torch.matmul for more details on compatible shapes: + https://pytorch.org/docs/stable/generated/torch.matmul.html + By default, vec is expected to share the same size as the channel or + feature dimension of the activations. + activation_fn (Callable, optional): An optional activation function to + apply to the activations before computing the matrix product. If set + to None, then no activation function will be used. + Default: torch.nn.functional.relu + move_channel_dim_to_final_dim (bool, optional): Whether or not to move the + channel dimension to the last dimension before computing the matrix + product. + Default: True + + Returns + x (torch.Tensor): A vector created from the input activations and the + stored vector. + """ + assert x.device == vec.device + assert x.dim() > 1 + if activation_fn: + x = activation_fn(x) + if x.dim() > 2 and move_channel_dim_to_final_dim: + permute_vals = [0] + list(range(x.dim()))[2:] + [1] + x = x.permute(*permute_vals) + return torch.mean(x @ vec, [1, 2]) + else: + return (x @ vec)[:, None] diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 49c35ed9d4..4b516e4fa0 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -197,6 +197,165 @@ def test_activation_weights_1(self) -> None: ) +class TestL2Mean(BaseTest): + def test_l2mean_init(self) -> None: + model = torch.nn.Identity() + loss = opt_loss.L2Mean(model) + self.assertEqual(loss.constant, 0.5) + self.assertIsNone(loss.channel_index) + + def test_l2mean_constant(self) -> None: + model = BasicModel_ConvNet_Optim() + constant = 0.5 + loss = opt_loss.L2Mean(model.layer, constant=constant) + output = get_loss_value(model, loss) + + expected = (CHANNEL_ACTIVATION_0_LOSS - constant) ** 2 + self.assertAlmostEqual(output, expected, places=6) + + def test_l2mean_channel_index(self) -> None: + model = BasicModel_ConvNet_Optim() + constant = 0.0 + loss = opt_loss.L2Mean(model.layer, channel_index=0, constant=constant) + output = get_loss_value(model, loss) + + expected = (CHANNEL_ACTIVATION_0_LOSS - constant) ** 2 + self.assertAlmostEqual(output, expected, places=6) + + +class TestVectorLoss(BaseTest): + def test_vectorloss_init(self) -> None: + model = torch.nn.Identity() + vec = torch.tensor([0, 1]).float() + loss = opt_loss.VectorLoss(model, vec=vec) + assertTensorAlmostEqual(self, loss.vec, vec, delta=0.0) + self.assertTrue(loss.move_channel_dim_to_final_dim) + self.assertEqual(loss.activation_fn, torch.nn.functional.relu) + + def test_vectorloss_single_channel(self) -> None: + model = BasicModel_ConvNet_Optim() + vec = torch.tensor([0, 1]).float() + loss = opt_loss.VectorLoss(model.layer, vec=vec) + output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6]) + self.assertAlmostEqual(output, CHANNEL_ACTIVATION_1_LOSS, places=6) + + def test_vectorloss_multiple_channels(self) -> None: + model = BasicModel_ConvNet_Optim() + vec = torch.tensor([1, 1]).float() + loss = opt_loss.VectorLoss(model.layer, vec=vec) + output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6]) + self.assertAlmostEqual(output, CHANNEL_ACTIVATION_1_LOSS * 2, places=6) + + +class TestFacetLoss(BaseTest): + def test_facetloss_init(self) -> None: + model = torch.nn.Sequential(torch.nn.Identity(), torch.nn.Identity()) + vec = torch.tensor([0, 1, 0]).float() + facet_weights = torch.ones([1, 2, 1, 1]) * 1.5 + loss = opt_loss.FacetLoss( + ultimate_target=model[1], + layer_target=model[0], + vec=vec, + facet_weights=facet_weights, + ) + assertTensorAlmostEqual(self, loss.vec, vec, delta=0.0) + assertTensorAlmostEqual(self, loss.facet_weights, facet_weights, delta=0.0) + + def test_facetloss_single_channel(self) -> None: + layer = torch.nn.Conv2d(2, 3, 1, bias=True) + layer.weight.data.fill_(0.1) + layer.bias.data.fill_(1) + model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) + + vec = torch.tensor([0, 1, 0]).float() + facet_weights = torch.ones([1, 2, 1, 1]) * 1.5 + loss = opt_loss.FacetLoss( + ultimate_target=model[1], + layer_target=model[0].layer, + vec=vec, + facet_weights=facet_weights, + ) + output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6]) + expected = (CHANNEL_ACTIVATION_0_LOSS * 2) * 1.5 + self.assertAlmostEqual(output, expected / 10.0, places=6) + + def test_facetloss_multi_channel(self) -> None: + layer = torch.nn.Conv2d(2, 3, 1, bias=True) + layer.weight.data.fill_(0.1) + layer.bias.data.fill_(1) + + model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) + + vec = torch.tensor([1, 1, 1]).float() + facet_weights = torch.ones([1, 2, 1, 1]) * 2.0 + loss = opt_loss.FacetLoss( + ultimate_target=model[1], + layer_target=model[0].layer, + vec=vec, + facet_weights=facet_weights, + ) + output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6]) + self.assertAlmostEqual(output, 1.560000, places=6) + + def test_facetloss_strength(self) -> None: + layer = torch.nn.Conv2d(2, 3, 1, bias=True) + layer.weight.data.fill_(0.1) + layer.bias.data.fill_(1) + model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) + + vec = torch.tensor([0, 1, 0]).float() + facet_weights = torch.ones([1, 2, 1, 1]) * 1.5 + strength = 0.5 + loss = opt_loss.FacetLoss( + ultimate_target=model[1], + layer_target=model[0].layer, + vec=vec, + facet_weights=facet_weights, + strength=strength, + ) + self.assertEqual(loss.strength, strength) + output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6]) + self.assertAlmostEqual(output, 0.1950000, places=6) + + def test_facetloss_strength_batch(self) -> None: + layer = torch.nn.Conv2d(2, 3, 1, bias=True) + layer.weight.data.fill_(0.1) + layer.bias.data.fill_(1) + model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) + + vec = torch.tensor([0, 1, 0]).float() + facet_weights = torch.ones([1, 2, 1, 1]) * 1.5 + strength = [0.1, 5.05] + loss = opt_loss.FacetLoss( + ultimate_target=model[1], + layer_target=model[0].layer, + vec=vec, + facet_weights=facet_weights, + strength=strength, + ) + self.assertEqual(loss.strength, strength) + output = get_loss_value(model, loss, input_shape=[4, 3, 6, 6]) + self.assertAlmostEqual(output, 4.017000198364258, places=6) + + def test_facetloss_2d_weights(self) -> None: + layer = torch.nn.Conv2d(2, 3, 1, bias=True) + layer.weight.data.fill_(0.1) + layer.bias.data.fill_(1) + model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) + + vec = torch.tensor([0, 1, 0]).float() + facet_weights = torch.ones([1, 2]) * 1.5 + loss = opt_loss.FacetLoss( + ultimate_target=model[1], + layer_target=model[0].layer, + vec=vec, + facet_weights=facet_weights, + ) + output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6]) + expected = (CHANNEL_ACTIVATION_0_LOSS * 2) * 1.5 + self.assertAlmostEqual(output, expected / 10.0, places=6) + + class TestCompositeLoss(BaseTest): def test_negative(self) -> None: model = BasicModel_ConvNet_Optim() diff --git a/tests/optim/utils/image/test_common.py b/tests/optim/utils/image/test_common.py index ef484c7135..fcece26683 100644 --- a/tests/optim/utils/image/test_common.py +++ b/tests/optim/utils/image/test_common.py @@ -516,3 +516,37 @@ def test_make_grid_image_single_tensor_pad_value_jit_module(self) -> None: ) self.assertEqual(list(expected_output.shape), [1, 1, 7, 7]) assertTensorAlmostEqual(self, test_output, expected_output, 0) + + +class TestCreateNewVector(BaseTest): + def test_create_new_vector_one_hot(self) -> None: + x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float() + vec = torch.tensor([0, 1, 0]).float() + out = common._create_new_vector(x, vec) + self.assertEqual(out.item(), 37.0) + + def test_create_new_vector_one_hot_batch(self) -> None: + x = torch.arange(0, 4 * 3 * 5 * 5).view(4, 3, 5, 5).float() + vec = torch.tensor([0, 1, 0]).float() + out = common._create_new_vector(x, vec) + self.assertEqual(out.tolist(), [37.0, 112.0, 187.0, 262.0]) + + def test_create_new_vector(self) -> None: + x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float() + vec = torch.tensor([1, 1, 1]).float() + out = common._create_new_vector(x, vec) + self.assertEqual(out.item(), 111.0) + + def test_create_new_vector_activation_fn(self) -> None: + x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float() + x = x - x.mean() + vec = torch.tensor([1, 0, 1]).float() + out = common._create_new_vector(x, vec, activation_fn=torch.nn.functional.relu) + self.assertEqual(out.item(), 25.0) + + def test_create_new_vector_no_activation_fn(self) -> None: + x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float() + x = x - x.mean() + vec = torch.tensor([1, 1, 1]).float() + out = common._create_new_vector(x, vec, activation_fn=None) + self.assertEqual(out.item(), 0.0) From 77850c7ed2bb6b6065578a2d3fa38aadbfee4d90 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 17 May 2022 11:06:44 -0600 Subject: [PATCH 006/174] Fix Mypy error --- tests/optim/core/test_loss.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 4b516e4fa0..d2cf248bdc 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -263,8 +263,8 @@ def test_facetloss_init(self) -> None: def test_facetloss_single_channel(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.data.fill_(0.1) - layer.bias.data.fill_(1) + layer.weight.fill_(0.1) + layer.bias.fill_(1) model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() @@ -281,8 +281,8 @@ def test_facetloss_single_channel(self) -> None: def test_facetloss_multi_channel(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.data.fill_(0.1) - layer.bias.data.fill_(1) + layer.weight.fill_(0.1) + layer.bias.fill_(1) model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) @@ -299,8 +299,8 @@ def test_facetloss_multi_channel(self) -> None: def test_facetloss_strength(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.data.fill_(0.1) - layer.bias.data.fill_(1) + layer.weight.fill_(0.1) + layer.bias.fill_(1) model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() @@ -319,8 +319,8 @@ def test_facetloss_strength(self) -> None: def test_facetloss_strength_batch(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.data.fill_(0.1) - layer.bias.data.fill_(1) + layer.weight.fill_(0.1) + layer.bias.fill_(1) model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() @@ -339,8 +339,8 @@ def test_facetloss_strength_batch(self) -> None: def test_facetloss_2d_weights(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.data.fill_(0.1) - layer.bias.data.fill_(1) + layer.weight.fill_(0.1) + layer.bias.fill_(1) model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() From a4eee848254611125954cddbf057d063fc16c5c1 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 17 May 2022 11:46:17 -0600 Subject: [PATCH 007/174] Fix Mypy errors --- tests/optim/core/test_loss.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index d2cf248bdc..39d8ef4ee1 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -263,8 +263,8 @@ def test_facetloss_init(self) -> None: def test_facetloss_single_channel(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.fill_(0.1) - layer.bias.fill_(1) + layer.weight.data.fill_(0.1) # type: ignore + layer.bias.data.fill_(1) # type: ignore model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() @@ -281,8 +281,8 @@ def test_facetloss_single_channel(self) -> None: def test_facetloss_multi_channel(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.fill_(0.1) - layer.bias.fill_(1) + layer.weight.data.fill_(0.1) # type: ignore + layer.bias.data.fill_(1) # type: ignore model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) @@ -299,8 +299,8 @@ def test_facetloss_multi_channel(self) -> None: def test_facetloss_strength(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.fill_(0.1) - layer.bias.fill_(1) + layer.weight.data.fill_(0.1) # type: ignore + layer.bias.data.fill_(1) # type: ignore model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() @@ -319,8 +319,8 @@ def test_facetloss_strength(self) -> None: def test_facetloss_strength_batch(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.fill_(0.1) - layer.bias.fill_(1) + layer.weight.data.fill_(0.1) # type: ignore + layer.bias.data.fill_(1) # type: ignore model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() @@ -339,8 +339,8 @@ def test_facetloss_strength_batch(self) -> None: def test_facetloss_2d_weights(self) -> None: layer = torch.nn.Conv2d(2, 3, 1, bias=True) - layer.weight.fill_(0.1) - layer.bias.fill_(1) + layer.weight.data.fill_(0.1) # type: ignore + layer.bias.data.fill_(1) # type: ignore model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() From 452979baabd0b9aa709bb199746058084e48fc32 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 17 May 2022 18:04:07 -0600 Subject: [PATCH 008/174] Add Optimization With Transparency tutorial --- ...ptimizationWithTransparency_OptimViz.ipynb | 4425 +++++++++++++++++ 1 file changed, 4425 insertions(+) create mode 100644 tutorials/optimviz/OptimizationWithTransparency_OptimViz.ipynb diff --git a/tutorials/optimviz/OptimizationWithTransparency_OptimViz.ipynb b/tutorials/optimviz/OptimizationWithTransparency_OptimViz.ipynb new file mode 100644 index 0000000000..5c73dd2ed7 --- /dev/null +++ b/tutorials/optimviz/OptimizationWithTransparency_OptimViz.ipynb @@ -0,0 +1,4425 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "OptimizationWithTransparency_OptimViz.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "370a9f4d87814515a51144d26a9ca8b3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_fbec190edc884c0aa2342d4c278bc7c6", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_11f67942024d4e3098a9e7d88b0b144d", + "IPY_MODEL_58498c78f5a046a8853c954d6bcb264f", + "IPY_MODEL_2db7e08b9242423c85928c537e7f300d" + ] + } + }, + "fbec190edc884c0aa2342d4c278bc7c6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "11f67942024d4e3098a9e7d88b0b144d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_05f2bd3ad5f14f698bef478c33eeb2b1", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_7efa32283f78475c994a3c20011f017d" + } + }, + "58498c78f5a046a8853c954d6bcb264f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_97e90f93bdff4cdb84ed7616f9b2fa08", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 512, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 512, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_73adf96fa6c84b608c2a6927a5347414" + } + }, + "2db7e08b9242423c85928c537e7f300d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_4afd2911641f44278eeb8dae71721be8", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 512/512 [00:25<00:00, 20.20 step/s, Objective=-940.6]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_a6fa5361b97d4790a7ed78c928612fd6" + } + }, + "05f2bd3ad5f14f698bef478c33eeb2b1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "7efa32283f78475c994a3c20011f017d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "97e90f93bdff4cdb84ed7616f9b2fa08": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "73adf96fa6c84b608c2a6927a5347414": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "4afd2911641f44278eeb8dae71721be8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "a6fa5361b97d4790a7ed78c928612fd6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "a98966a99b5b41bc8559e8046b96969f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_3f4c72541ad84ff0b05071d020cd2f0a", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_5de4bf65e4cf4492aa2f35bb7bcd5167", + "IPY_MODEL_c0fcfadc6d1e4596b9b3a88f1e6d0a0f", + "IPY_MODEL_31fe21e26c214532aeb4844f009e92f0" + ] + } + }, + "3f4c72541ad84ff0b05071d020cd2f0a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "5de4bf65e4cf4492aa2f35bb7bcd5167": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_cf4af50e246443a8832eb622bd2b0ddb", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_c615948ca593466fb2602d98da4fb5ef" + } + }, + "c0fcfadc6d1e4596b9b3a88f1e6d0a0f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_911b842b1d374479b06b272674dee5d1", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 256, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 256, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_8520b5deb27740d997b4a4a05fe6e493" + } + }, + "31fe21e26c214532aeb4844f009e92f0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_cdd0fd17c90c4a6a9c51036ddf9cde78", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "​", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 256/256 [00:09<00:00, 26.77 step/s, Objective=-2799.0]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_9f56ee00c73141fb8294ee315a94f718" + } + }, + "cf4af50e246443a8832eb622bd2b0ddb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "c615948ca593466fb2602d98da4fb5ef": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "911b842b1d374479b06b272674dee5d1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "8520b5deb27740d997b4a4a05fe6e493": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "cdd0fd17c90c4a6a9c51036ddf9cde78": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "9f56ee00c73141fb8294ee315a94f718": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "f7c74f1afcc044d089932873da46fb0c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_550cd2bd52134286b76bccbeef7abcb1", + "IPY_MODEL_8cb148d2cac34c0dacac2470bf1e9425", + "IPY_MODEL_c86de569236942e49689347e283dca4c" + ], + "layout": "IPY_MODEL_c57371c34d724c24beb4349ed2d537c7" + } + }, + "550cd2bd52134286b76bccbeef7abcb1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_969e4090581846f69cd6bf3bf8ad89a4", + "placeholder": "​", + "style": "IPY_MODEL_4bffb6e24fd04f9bb81df1458ef1591c", + "value": "100%" + } + }, + "8cb148d2cac34c0dacac2470bf1e9425": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0de0fbbd2d194cd386a0bd2b018828cb", + "max": 512, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_767f518665f34f4ebf5f9498ff2c9f19", + "value": 512 + } + }, + "c86de569236942e49689347e283dca4c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_46e8522957ac45129b3aee66cdc47f08", + "placeholder": "​", + "style": "IPY_MODEL_f06233ce85924fcb8bba14228f4325ef", + "value": " 512/512 [00:12<00:00, 41.50 step/s, Objective=-292.1]" + } + }, + "c57371c34d724c24beb4349ed2d537c7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "969e4090581846f69cd6bf3bf8ad89a4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4bffb6e24fd04f9bb81df1458ef1591c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0de0fbbd2d194cd386a0bd2b018828cb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "767f518665f34f4ebf5f9498ff2c9f19": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "46e8522957ac45129b3aee66cdc47f08": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f06233ce85924fcb8bba14228f4325ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b9b1828c563c4cd184f26fa5590b3f5d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_03a3658f7c2e499f9528d3376ac6b203", + "IPY_MODEL_6717308b8d6148d9a9c8747164b791b6", + "IPY_MODEL_53a11c21782140afa93165abf2f97e76" + ], + "layout": "IPY_MODEL_b91e276e9fb24ebb804eb5605707874b" + } + }, + "03a3658f7c2e499f9528d3376ac6b203": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6dd3c9c30bb246cdbb364456cd1bf5e8", + "placeholder": "​", + "style": "IPY_MODEL_5017968b4ae742d5b8320942b325e707", + "value": "100%" + } + }, + "6717308b8d6148d9a9c8747164b791b6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_92994846e32f4fd4a079444319362f1a", + "max": 512, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_35d3a18dfd08421ba1543031b5fb8cab", + "value": 512 + } + }, + "53a11c21782140afa93165abf2f97e76": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3952b6f664e94cf8ad7edaf249a17d1b", + "placeholder": "​", + "style": "IPY_MODEL_b6e7d16af29a4e43ac54a249e843d973", + "value": " 512/512 [00:12<00:00, 39.40 step/s, Objective=-786.4]" + } + }, + "b91e276e9fb24ebb804eb5605707874b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6dd3c9c30bb246cdbb364456cd1bf5e8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5017968b4ae742d5b8320942b325e707": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "92994846e32f4fd4a079444319362f1a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "35d3a18dfd08421ba1543031b5fb8cab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3952b6f664e94cf8ad7edaf249a17d1b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b6e7d16af29a4e43ac54a249e843d973": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cee03ddb22f84eefa613c6446234c6c4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2bb9a8610f0e4d8b91d054cfe9140801", + "IPY_MODEL_f825760c27ee4b80830654f3c02ae65b", + "IPY_MODEL_fafbc35e64814fa4b13e5da2f643dddd" + ], + "layout": "IPY_MODEL_5b9280650f144ff882e0d329ff4cb5bc" + } + }, + "2bb9a8610f0e4d8b91d054cfe9140801": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_084a58aa0af344a2b2a3fcafa838811c", + "placeholder": "​", + "style": "IPY_MODEL_f1f53143baa94a89817ff46acece5054", + "value": "100%" + } + }, + "f825760c27ee4b80830654f3c02ae65b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ddc620d6a2c042789bda344dc94b5017", + "max": 256, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1ed5c534ec334eec8d144f912e6beb23", + "value": 256 + } + }, + "fafbc35e64814fa4b13e5da2f643dddd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_84afeb12ab79493a8aa8e3040323216d", + "placeholder": "​", + "style": "IPY_MODEL_0dbfdbf943244faea948bfafc16c4a2f", + "value": " 256/256 [00:06<00:00, 41.01 step/s, Objective=-2563.6]" + } + }, + "5b9280650f144ff882e0d329ff4cb5bc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "084a58aa0af344a2b2a3fcafa838811c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f1f53143baa94a89817ff46acece5054": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ddc620d6a2c042789bda344dc94b5017": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1ed5c534ec334eec8d144f912e6beb23": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "84afeb12ab79493a8aa8e3040323216d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0dbfdbf943244faea948bfafc16c4a2f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "97f8059a1a0f45f795ed677e3b7a653a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0f98ad01cf3d473eadffe691475b39fb", + "IPY_MODEL_f71ac5cdf889431297f604518614ade8", + "IPY_MODEL_022f04c4b4754a90a4910a02d3386106" + ], + "layout": "IPY_MODEL_96c9ebb9cfc047198f97db04c7be8b66" + } + }, + "0f98ad01cf3d473eadffe691475b39fb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cb9c56fb447945a3b9c20f52b8bc6748", + "placeholder": "​", + "style": "IPY_MODEL_f90c6c3c80cc49a8846396e11d739b96", + "value": "100%" + } + }, + "f71ac5cdf889431297f604518614ade8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f540a3f6f1dc4f169746510dca7b3691", + "max": 512, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_55af60abb1ca4831bfdaa12185303e79", + "value": 512 + } + }, + "022f04c4b4754a90a4910a02d3386106": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_40cc1ffd1c734f9e800e8a40a234512e", + "placeholder": "​", + "style": "IPY_MODEL_946a6ac6a24d49e39f3248f9936ef592", + "value": " 512/512 [00:13<00:00, 41.13 step/s, Objective=-1352.2]" + } + }, + "96c9ebb9cfc047198f97db04c7be8b66": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb9c56fb447945a3b9c20f52b8bc6748": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f90c6c3c80cc49a8846396e11d739b96": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f540a3f6f1dc4f169746510dca7b3691": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "55af60abb1ca4831bfdaa12185303e79": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "40cc1ffd1c734f9e800e8a40a234512e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "946a6ac6a24d49e39f3248f9936ef592": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "95d38ecf0e3f42d285b3b72179601f70": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_323d89c37c62400ca33f194b44ae74d0", + "IPY_MODEL_baf6d0f46126420395bd64ec76a704d6", + "IPY_MODEL_0c99c38f17544da997a575538dd2e5f0" + ], + "layout": "IPY_MODEL_4d3ba63fda70437a9bc0770e6214f1c6" + } + }, + "323d89c37c62400ca33f194b44ae74d0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_99f161d1f27144ec8721c8dd6e841da6", + "placeholder": "​", + "style": "IPY_MODEL_6742449d54ea4997b5b85082b7d12efd", + "value": "100%" + } + }, + "baf6d0f46126420395bd64ec76a704d6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9ad0d9e48e7a4a7ba7f66cec35a8eacd", + "max": 512, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d7c6b875af764e0a9aac393bb539acf3", + "value": 512 + } + }, + "0c99c38f17544da997a575538dd2e5f0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_27d1bfac70e64b04925375e57162aaae", + "placeholder": "​", + "style": "IPY_MODEL_cf4d1a9836814fab81ca7688a66d5fab", + "value": " 512/512 [00:12<00:00, 39.01 step/s, Objective=-1222.3]" + } + }, + "4d3ba63fda70437a9bc0770e6214f1c6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "99f161d1f27144ec8721c8dd6e841da6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6742449d54ea4997b5b85082b7d12efd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9ad0d9e48e7a4a7ba7f66cec35a8eacd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d7c6b875af764e0a9aac393bb539acf3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "27d1bfac70e64b04925375e57162aaae": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cf4d1a9836814fab81ca7688a66d5fab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3f4b2348efa0443ab3c29300b85f29e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_fe953f251ac24f8b912db5cf4f9864e3", + "IPY_MODEL_5601082b45ce4996acd41e91921243c2", + "IPY_MODEL_82e4a1dbe4944e28bbab6ea2e8ad5661" + ], + "layout": "IPY_MODEL_3137aeea1e504d1f842dd8e65667bc70" + } + }, + "fe953f251ac24f8b912db5cf4f9864e3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e306b531228a441491fbdfccb9522fdc", + "placeholder": "​", + "style": "IPY_MODEL_0317501458264f4e822b3486207f8019", + "value": "100%" + } + }, + "5601082b45ce4996acd41e91921243c2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aeff5916a0e140e3a254d2bf7e2fd60b", + "max": 512, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6b3d9810d08b4ce190d7c3a801a345e8", + "value": 512 + } + }, + "82e4a1dbe4944e28bbab6ea2e8ad5661": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f06b61f3847b477487f4359bf855c4d1", + "placeholder": "​", + "style": "IPY_MODEL_55c305b5b8ed407f972fd2b775a5d18c", + "value": " 512/512 [00:12<00:00, 40.96 step/s, Objective=-2751.7]" + } + }, + "3137aeea1e504d1f842dd8e65667bc70": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e306b531228a441491fbdfccb9522fdc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0317501458264f4e822b3486207f8019": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "aeff5916a0e140e3a254d2bf7e2fd60b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6b3d9810d08b4ce190d7c3a801a345e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f06b61f3847b477487f4359bf855c4d1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "55c305b5b8ed407f972fd2b775a5d18c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Optimizing with Transparency" + ], + "metadata": { + "id": "dnzyC1T_A92P" + } + }, + { + "cell_type": "markdown", + "source": [ + "This tutorial notebook illustrates how to use Captum.optim to render RGBA images when using models trained only on RGB images. This process is known as optimizing with transparency, and more information on it can be found at [the corresponding research paper](https://distill.pub/2018/differentiable-parameterizations/#section-rgba). As we will see below, optimizing with transparency yields important information about the saliency of feature visualizations that regular feature visualizations miss." + ], + "metadata": { + "id": "Vp2ArO9T9wZO" + } + }, + { + "cell_type": "code", + "source": [ + "from typing import Callable, Tuple, List, Optional, Sequence, Union, Dict\n", + "import math\n", + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "import captum.optim as opt\n", + "from captum.optim.models import googlenet\n", + "\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "model = googlenet(pretrained=True).to(device)" + ], + "metadata": { + "id": "Tz9CVl-TZ8Ha" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "In addition to a visualization function, we'll define four main helper functions for this tutorial. The first function allows us to create distinct checkerboard backgrounds that let us easily see transparency, and the second function allows for the compositing of RGBA images onto backgrounds. The third function allows us to quickly view RGBA images on multiple distinct backgrounds. The fourth function simply allows us to graph the loss values from our rendering." + ], + "metadata": { + "id": "JsPKNvxKTehk" + } + }, + { + "cell_type": "code", + "source": [ + "ModuleOutputMapping = Dict[torch.nn.Module, Optional[torch.Tensor]]\n", + "\n", + "import matplotlib.pylab as plt\n", + "\n", + "\n", + "def visualize(\n", + " model: torch.nn.Module,\n", + " loss_fn: opt.loss.Loss,\n", + " image: opt.images.ImageParameterization,\n", + " transforms: Optional[Union[torch.nn.Module, List[torch.nn.Module]]] = None,\n", + " n_iter: int = 512,\n", + " lr: float = 0.01,\n", + " return_image_instance: bool = False,\n", + ") -> Tuple[\n", + " Union[opt.images.ImageParameterization, opt.images.ImageTensor], torch.Tensor\n", + "]:\n", + " \"\"\"\n", + " Helper function rendering results.\n", + "\n", + " Args:\n", + " model (nn.Module): A PyTorch model instance.\n", + " loss_function (callable): The loss function to minimize during optimization\n", + " optimization.\n", + " image (ImageParameterization): An image parameterization to render.\n", + " transforms (nn.Module or list of nn.Module, optional): The transforms to use\n", + " for optimization. If set to None then TransformationRobustness() is used.\n", + " Default: None\n", + " n_iter (int, optional): Number of steps to run optimization for.\n", + " Default: 512\n", + " lr: (float, optional): If no optimizer is given, then lr is used as the\n", + " learning rate for the Adam optimizer.\n", + " Default: 0.01\n", + " return_image_instance (bool, optional): Whether or not to return a detached\n", + " tensor or the ImageParameterization instance.\n", + " Default: False\n", + "\n", + " Returns:\n", + " image (torch.Tensor or NaturalImage instance): The results of the rendering.\n", + " history (torch.Tensor): The loss history for the rendering.\n", + " \"\"\"\n", + " assert image().dim() == 4\n", + " if transforms is None:\n", + " transforms = opt.transforms.TransformationRobustness()\n", + " transforms = (\n", + " torch.nn.Sequential(*transforms)\n", + " if isinstance(transforms, (list, tuple))\n", + " else transforms\n", + " )\n", + " obj = opt.InputOptimization(model, loss_fn, image, transforms)\n", + " history = obj.optimize(opt.optimization.n_steps(n_iter, True), lr=lr)\n", + " if return_image_instance:\n", + " return image, history\n", + " else:\n", + " return image().detach(), history\n", + "\n", + "\n", + "def create_checkerboard(\n", + " size: Tuple[int, int],\n", + " channels: int = 3,\n", + " tiles: int = 4,\n", + " colors: List[float] = [1.0, 0.0],\n", + ") -> torch.Tensor:\n", + " \"\"\"\n", + " Create a checkerboard pattern.\n", + "\n", + " Based on Lucid's checkerboard function from here: https://github.com/tensorflow/\n", + " lucid/blob/master/notebooks/differentiable-parameterizations/transparency.ipynb\n", + "\n", + " Args:\n", + "\n", + " size (Tuple[int, int]): The dimensions to use when creating the image, with a\n", + " shape of: [H, W].\n", + " channels (int, optional): The number of image channels to use for the output\n", + " image.\n", + " Default: 3\n", + " tiles (int, optional): The number of tiles to create inside the image.\n", + " Default: 4\n", + " colors (list of float, optional): A list of colors to use for the\n", + " checkerboard.\n", + " Default: [1.0, 0.0]\n", + "\n", + " Returns:\n", + " tensor (torch.Tensor): An NCHW image with a checkerboard pattern.\n", + " \"\"\"\n", + " assert len(size) == 2 and len(colors) == 2\n", + "\n", + " square = torch.ones([math.ceil(float(d / tiles) / 2) for d in size])\n", + " board = torch.tensor([colors * tiles, colors[::-1] * tiles] * tiles)\n", + " scaled = torch.kron(board, square)[: size[0], : size[1]]\n", + " return torch.stack([scaled] * channels)\n", + "\n", + "\n", + "def composite_alpha(\n", + " x: torch.Tensor,\n", + " background: torch.Tensor,\n", + " gamma_to_linear: bool = False,\n", + " linear_to_gamma: bool = True,\n", + ") -> torch.Tensor:\n", + " \"\"\"\n", + " Composite an RGBA NCHW image tensor onto an NCHW image tensor background.\n", + "\n", + " See here for more details:\n", + " https://en.wikipedia.org/wiki/Alpha_compositing\n", + " https://en.wikipedia.org/wiki/Alpha_compositing#Gamma_correction\n", + "\n", + " Args:\n", + "\n", + " x (torch.Tensor): The RGBA image tensor with 4 channels in the format of NCHW.\n", + " background (torch.Tensor): The background NCHW image tensor to use.\n", + " gamma_to_linear (bool, optional): Whether or not to convert the alpha channel\n", + " of the input image from gamma to a linear format.\n", + " Default: False\n", + " linear_to_gamma (bool, optional): Whether or not to convert the output image\n", + " from linear to gamma format.\n", + " Default: True\n", + "\n", + " Returns:\n", + " image (torch.Tensor): The input image composited on top of the background.\n", + " \"\"\"\n", + " assert x.dim() == 4 and x.shape[1] == 4\n", + " assert background.dim() == 4\n", + " assert x.device == background.device\n", + " if gamma_to_linear:\n", + " x[:, :3, ...] = x[:, :3, ...].clone() ** 2.2\n", + " rgb, alpha_channel = x[:, :3, ...], x[:, 3:, ...]\n", + " image = background * (1.0 - alpha_channel) + rgb * alpha_channel\n", + " if linear_to_gamma:\n", + " image = image ** (1.0 / 2.2)\n", + " return image\n", + "\n", + "\n", + "def create_mosaic(\n", + " img: torch.Tensor,\n", + " background: Optional[torch.Tensor] = None,\n", + " num_tiles: int = 4,\n", + " gamma_to_linear: bool = False,\n", + " linear_to_gamma: bool = True,\n", + ") -> torch.Tensor:\n", + " \"\"\"\n", + " Composite an NCHW RGBA image tensor onto 4 distinct backgrounds;\n", + " no background, checkerboard, white, and black backgrounds.\n", + "\n", + " Args:\n", + "\n", + " img (torch.Tensor): An RGBA NCHW image tensor.\n", + " background (torch.Tensor, optional): An NCHW image tensor to use as a\n", + " background for the img input. If set to None, then a checkerboard\n", + " background will be used.\n", + " Default: None\n", + " tiles (int, optional): The number of tiles to use for the checkerboard\n", + " background image. This variable is only used if background is set to None.\n", + " Default: 4\n", + " gamma_to_linear (bool, optional): Whether or not to convert the alpha channel\n", + " of the input image from gamma to a linear format.\n", + " Default: False\n", + " linear_to_gamma (bool, optional): Whether or not to convert the output image\n", + " from linear to gamma format.\n", + " Default: True\n", + "\n", + " Returns:\n", + " mosaic_tensor (torch.Tensor): An NCHW image mosaic showing the img\n", + " input on different backgrounds.\n", + " \"\"\"\n", + " assert img.dim() == 4 and img.shape[1] == 4\n", + " img_list = [img[:, :3]]\n", + "\n", + " # Place visualizations on top of custom or checkerboard image\n", + " if background is None:\n", + " background = (\n", + " create_checkerboard(img.shape[2:], tiles=num_tiles)\n", + " .unsqueeze(0)\n", + " .to(img.device)\n", + " )\n", + "\n", + " img_list.append(\n", + " composite_alpha(\n", + " img,\n", + " background,\n", + " gamma_to_linear=gamma_to_linear,\n", + " linear_to_gamma=linear_to_gamma,\n", + " )\n", + " )\n", + "\n", + " # Place visualization on white background\n", + " img_list.append(\n", + " composite_alpha(\n", + " img,\n", + " torch.ones_like(img[:, :3]),\n", + " gamma_to_linear=gamma_to_linear,\n", + " linear_to_gamma=linear_to_gamma,\n", + " )\n", + " )\n", + "\n", + " # Place visualization on black background\n", + " img_list.append(\n", + " composite_alpha(\n", + " img,\n", + " torch.zeros_like(img[:, :3]),\n", + " gamma_to_linear=gamma_to_linear,\n", + " linear_to_gamma=linear_to_gamma,\n", + " )\n", + " )\n", + " return torch.cat(img_list)\n", + "\n", + "\n", + "def composite_alpha_only(x: torch.Tensor) -> torch.Tensor:\n", + " \"\"\"\n", + " Visualize the alpha channel of an NCHW RGBA image tensor.\n", + "\n", + " Args:\n", + "\n", + " x (torch.Tensor): An RGBA NCHW image tensor.\n", + "\n", + " Returns:\n", + " x (torch.Tensor): An RGB NCHW image tensor for the 4th input image channel.\n", + " \"\"\"\n", + " assert x.dim() == 4 and x.shape[1] == 4\n", + " return torch.ones_like(x[:, :3]) * x[:, 3:]\n", + "\n", + "\n", + "def plot_loss(\n", + " history: Union[torch.Tensor, List[torch.Tensor]],\n", + " figsize: Optional[Union[Tuple[int, int], Tuple[float, float]]] = None,\n", + " title: Optional[str] = None,\n", + " labels: Optional[List[str]] = None,\n", + " axis_names: Optional[List[str]] = [\"Step\", \"Loss\"],\n", + ") -> None:\n", + " \"\"\"\n", + " Helper function for graphing losses.\n", + "\n", + " Args:\n", + "\n", + " history (torch.Tensor or list of torch.Tensor): A set of loss values inside\n", + " the history created from the optimize function.\n", + " figsize (tuple of int or tuple of float, optional): The size of the graph.\n", + " Default: None\n", + " title (str, optional): The title of the graph.\n", + " Default: None\n", + " labels (list of str, optional): A list labels to use if graphing multiple\n", + " history tensors.\n", + " Default: None\n", + " axis_names (list of str): The names to use for the x and y axes, in a format\n", + " of: [x_axis, y_axis].\n", + " Default: [\"Step\", \"Loss\"]\n", + " \"\"\"\n", + " assert len(axis_names) == 2\n", + " if figsize is not None:\n", + " plt.figure(figsize=figsize)\n", + " if not torch.is_tensor(history):\n", + " history = [h.detach().cpu().tolist() for h in history]\n", + " for i, h in enumerate(history):\n", + " label = \"Test \" + str(i + 1) if labels is None else labels[i]\n", + " plt.plot(h, label=label)\n", + " plt.legend()\n", + " else:\n", + " history = history.detach().cpu().tolist()\n", + " plt.plot(history)\n", + " if title is not None:\n", + " plt.title(title)\n", + " if axis_names is not None:\n", + " plt.ylabel(axis_names[1])\n", + " plt.xlabel(axis_names[0])\n", + " plt.show()" + ], + "metadata": { + "id": "GNdef32udfDh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Alpha Compositing\n", + "\n", + "We can verify that our alpha compositing code works by displaying Captum's logo on a custom background. We also show how to load an RGBA image using `ImageTensor`'s `open` function." + ], + "metadata": { + "id": "hJ7H4h6x5O8c" + } + }, + { + "cell_type": "code", + "source": [ + "# Download RGBA & show test image\n", + "img_url = (\n", + " \"https://github.com/pytorch/captum/raw/master/website/static/img/captum_logo.png\"\n", + ")\n", + "captum_logo = opt.images.ImageTensor.open(img_url, mode=\"RGBA\")[None, :].to(device)\n", + "\n", + "print(\"The RGBA image:\")\n", + "opt.images.show(captum_logo, figsize=(6.5, 6.5))\n", + "\n", + "# Show Captum logo with alpha channel only\n", + "print(\n", + " \"\\nThe RGBA image's alpha channel (white represents opaque \\nregions, and black\"\n", + " + \" represents transparent regions):\"\n", + ")\n", + "opt.images.show(composite_alpha_only(captum_logo), figsize=(6.5, 6.5))\n", + "\n", + "# Setup a checkerboard background image with square tiles\n", + "background = create_checkerboard([max(captum_logo.shape[2:])] * 2, tiles=4).to(device)\n", + "background = background[None, :, : captum_logo.shape[2], : captum_logo.shape[3]]\n", + "\n", + "# Make black background tiles blue\n", + "blue_color = torch.tensor([0.0, 0.7071, 0.7071], device=device).view(1, 3, 1, 1)\n", + "background = torch.where(background == 0.0, blue_color, background)\n", + "\n", + "# Show background image\n", + "print(\"\\nOur custom background image:\")\n", + "opt.images.show(background, figsize=(6.5, 6.5))\n", + "\n", + "# Composite logo onto background\n", + "captum_logo_on_background = composite_alpha(\n", + " captum_logo, background, gamma_to_linear=True\n", + ")\n", + "print(\"\\nThe RGBA image on top of the background image:\")\n", + "opt.images.show(captum_logo_on_background, figsize=(6.5, 6.5))" + ], + "metadata": { + "id": "hn_zkqFQ5OZn", + "outputId": "68b4bc28-6e0e-4c1b-bc9d-ac31c899a481", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 592 + } + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The RGBA image:\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "The RGBA image's alpha channel (white represents opaque \n", + "regions, and black represents transparent regions):\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Our custom background image:\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAABsCAYAAACPb8KhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAD2ElEQVR4nO3aMUtdZxzH8f+5xwQutEIhN5vdlCyWLBccQraSgEPpOyg45F0EHJq3UHDIq9BSKLSbLhIwjoYO4pLYRS4Rot77dGnHqx5I+nj+fD7rWX4envPl4WJTSgkAchrUHgDAlyPyAImJPEBiIg+QmMgDJCbyAIkt3PC8d/9fuXFwEK9PTmrP6OT5aBTb43G0TVN7yq1Nrq7iye5uvJ1Mak/pZHN5OV6urNSe0cmbs7N4urcXH6fT2lNurW2a2BmP49loVHtKJ1vHx/Hi8LD2jM7K+vrceLjJAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYiIPkJjIAyQm8gCJiTxAYgvXPdx+//7/2vHZfHPvXqw/fFh7RiffDofx64cP0dQe0sFFKfF4cTGWhsPaUzop0b9z/ffFRXz/4EFcllJ7yq0NIuKv8/Peveuzy8ve9eMmTbnm4LQ7O/05Vf/aWl2Nn5aWas/o5LfT0/hxfz+mPfqIv15YiD/X1uK7xcXaUzrZPDqKn4+Oas/o5PHiYvyxthZfLVx7J7tTrkqJH/b34/fT09pTOtlYWopfVldrz+isbZq5d8RrT83s82/58pom2vl/7500iIhpKb1639NSYtDDdx3Rv3NdIqLt2bsupUTp2Zn+T5/e8234TR4gMZEHSEzkARITeYDERB4gMZEHSEzkARITeYDERB4gMZEHSEzkARITeYDERB4gMZEHSEzkARITeYDERB4gMZEHSEzkARITeYDERB4gMZEHSEzkARITeYDERB4gMZEHSEzkARITeYDERB4gMZEHSEzkARITeYDERB4gMZEHSEzkARITeYDERB4gMZEHSEzkARITeYDERB4gMZEHSEzkARJrSilzH756927+wzvq02wWl7NZ7RmdtE0Tw7atPaOz8+k0Ztecn7vo/mAQ9wf9utvMSonz6bT2jM6GbRtt09Se0cnlbBafetaPiIhXjx7NfdHXRj4i+vUFR8TGwUG8PjmpPaOT56NRbI/HvfogJldX8WR3N95OJrWndLK5vBwvV1Zqz+jkzdlZPN3bi489Cn3bNLEzHsez0aj2lE62jo/jxeFh7RmdlfX1ufHo15UGgE5EHiCxm36uAaDH3OQBEhN5gMREHiAxkQdITOQBEhN5gMT+Af1+spSBMgUIAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "The RGBA image on top of the background image:\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Basic Optimization Without Transparency\n", + "\n", + "Below we'll start off by performing feature visualization without any sort of transparency." + ], + "metadata": { + "id": "U44pk7xERQ10" + } + }, + { + "cell_type": "code", + "source": [ + "# Set channel optimization target & render visualization\n", + "loss_fn = opt.loss.ChannelActivation(model.mixed4d.conv_3x3_reduce, channel_index=139)\n", + "image = opt.images.NaturalImage((320, 320), channels=3).to(device)\n", + "img_channel, _ = visualize(model, loss_fn, image, n_iter=512, lr=0.02)\n", + "\n", + "# Set neuron optimization target & render visualization\n", + "loss_fn = opt.loss.NeuronActivation(model.mixed4b, channel_index=373)\n", + "image = opt.images.NaturalImage((200, 200), channels=3).to(device)\n", + "img_neuron, _ = visualize(model, loss_fn, image, n_iter=256, lr=0.01)\n", + "\n", + "# Show both visualizations side by side\n", + "img_neuron = F.interpolate(img_neuron, size=(320, 320))\n", + "img_no_alpha = torch.cat([img_channel, img_neuron])\n", + "opt.images.show(img_no_alpha, figsize=(10, 5))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 367, + "referenced_widgets": [ + "370a9f4d87814515a51144d26a9ca8b3", + "fbec190edc884c0aa2342d4c278bc7c6", + "11f67942024d4e3098a9e7d88b0b144d", + "58498c78f5a046a8853c954d6bcb264f", + "2db7e08b9242423c85928c537e7f300d", + "05f2bd3ad5f14f698bef478c33eeb2b1", + "7efa32283f78475c994a3c20011f017d", + "97e90f93bdff4cdb84ed7616f9b2fa08", + "73adf96fa6c84b608c2a6927a5347414", + "4afd2911641f44278eeb8dae71721be8", + "a6fa5361b97d4790a7ed78c928612fd6", + "a98966a99b5b41bc8559e8046b96969f", + "3f4c72541ad84ff0b05071d020cd2f0a", + "5de4bf65e4cf4492aa2f35bb7bcd5167", + "c0fcfadc6d1e4596b9b3a88f1e6d0a0f", + "31fe21e26c214532aeb4844f009e92f0", + "cf4af50e246443a8832eb622bd2b0ddb", + "c615948ca593466fb2602d98da4fb5ef", + "911b842b1d374479b06b272674dee5d1", + "8520b5deb27740d997b4a4a05fe6e493", + "cdd0fd17c90c4a6a9c51036ddf9cde78", + "9f56ee00c73141fb8294ee315a94f718" + ] + }, + "id": "UNnYd0cEtOHN", + "outputId": "76811ff5-48ff-4d42-81d1-0faf56aceaa6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "370a9f4d87814515a51144d26a9ca8b3", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + " 0%| | 0/512 [00:00" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Looking at the above flower and car tire visualizations, we have no way of determining the importance of each part of the visualization. For example, we cannot easily tell what part of the flower is most important or how important the car body and ground are for tire detection.\n", + "\n", + "This limitation of feature visualization may seem like something unavoidable, however it can be overcome with some clever design!\n", + "\n", + "**Optimizing Additional Degrees of Freedom**\n", + "\n", + "* Feature visualization can yield a ton of information about a target, but by default is unable to work with some of the additional degrees of freedom that targets can have. One such area is the importance or saliency of each part of the visualization. In the case of a model trained on 3 channel RGB images, we can view this additional dimension by adding a 4th channel for alpha transparency to our image parameterization. " + ], + "metadata": { + "id": "NJvEZRQcSCr6" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Alpha Channel / Transparency\n", + "\n", + "**Optimizing With The Additional Alpha Channel**\n", + "\n", + "* Using the 4 channel RGBA image parameterization allows us to see the feature importance based on opacity. The more opaque something is, the more important it is. The more transparent something is, the less important it is.\n", + "\n", + "* The optim module has been designed so that using RGBA images is just as easy as RGB images. For example, `NaturalImage()` handles RGBA images without any changes, other than being initialized with `channels=4`.\n", + "\n", + "* To render a 4 channel visualization using a model that only supports 3 channels, we can use Captum's `BlendAlpha()` on our model input as the final transform. The `BlendAlpha()` transform performs [alpha composing](https://en.wikipedia.org/wiki/Alpha_compositing) which turns the 4 channel RGBA image into a 3 channel RGB image." + ], + "metadata": { + "id": "7GB_ASIOafYx" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Basic optimization with transparency\n", + "\n", + "\n", + "For basic optimization with transparency, we use a simple self balancing equation that avoids producing too much transparency or too much opaqueness:\n", + "\n", + "```\n", + "loss_fn = LossFunction * (1.0 - mean(alpha_channel))\n", + "```\n", + "\n", + "The above equation's alpha channel portion can be performed by using Captum's `opt.loss.ChannelLoss` objective with a channel index of `4` for the alpha channel and `opt.images.NaturalImage` as the target. This is demonstrated below." + ], + "metadata": { + "id": "sSknEhony0hd" + } + }, + { + "cell_type": "code", + "source": [ + "image_size = (320, 320)\n", + "\n", + "# Initialize NaturalImage with 4 channels\n", + "image = opt.images.NaturalImage(image_size, channels=4).to(device)\n", + "\n", + "# Set optimization target\n", + "loss_fn = opt.loss.ChannelActivation(model.mixed4d.conv_3x3_reduce, channel_index=139)\n", + "\n", + "# Use NaturalImage output as target, and collect alpha channel for mean()\n", + "loss_fn = loss_fn * (1.0 - opt.loss.ChannelActivation(image, channel_index=3))\n", + "\n", + "# Blend the alpha channel into the image as our final transform\n", + "transforms = [opt.transforms.TransformationRobustness(), opt.transforms.BlendAlpha()]\n", + "\n", + "# Render the visualization\n", + "img_basic, history_basic = visualize(\n", + " model, loss_fn, image, transforms=transforms, n_iter=512\n", + ")\n", + "\n", + "# Show visualization on multiple backgrounds\n", + "# The backgrounds are as follows: No transparency, checkerboard, white, & black\n", + "opt.images.show(create_mosaic(img_basic), images_per_row=2, figsize=(14, 14))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 824, + "referenced_widgets": [ + "f7c74f1afcc044d089932873da46fb0c", + "550cd2bd52134286b76bccbeef7abcb1", + "8cb148d2cac34c0dacac2470bf1e9425", + "c86de569236942e49689347e283dca4c", + "c57371c34d724c24beb4349ed2d537c7", + "969e4090581846f69cd6bf3bf8ad89a4", + "4bffb6e24fd04f9bb81df1458ef1591c", + "0de0fbbd2d194cd386a0bd2b018828cb", + "767f518665f34f4ebf5f9498ff2c9f19", + "46e8522957ac45129b3aee66cdc47f08", + "f06233ce85924fcb8bba14228f4325ef" + ] + }, + "id": "c6eh8j7Jyz-n", + "outputId": "892702f7-6b67-481c-c2e5-910bfe7b05a2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/512 [00:00" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "By placing our rendered image onto different backgrounds, we can clearly see the varying degrees of transparency throughout the image.\n", + "\n", + "While this naive strategy works pretty well, the channel visualization features are positioned all over the rendered image when using the `ChannelActivation` loss objective for model targets. In the next section, we'll demonstrate a potential improvement by using a custom optimization loss objective.\n", + "\n", + "We can also see that the optimization process is working well with our setup, by using the `plot_loss` helper function on the `history` output of `InputOptimization`'s `optimize` function." + ], + "metadata": { + "id": "E4Jr_QUw-xPk" + } + }, + { + "cell_type": "code", + "source": [ + "# Plot loss vs iterations\n", + "plot_loss(history_basic, title=\"Basic Alpha Channel Optimization\", figsize=(8, 5))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 350 + }, + "id": "N4VUvsoQ-wj-", + "outputId": "f444d5c9-6d59-44b6-d10b-6a8fbccbd498" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAf0AAAFNCAYAAAAKBrb9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOy9d5xcVf3//3pP374pm2wqSUgjoSf0okgv0hS+KNJUENQPKn70B2JBqYp+LEiRqjSRIoKEGmkSUkggpJLeNtkkW7Jt+r1zfn/ce+6ce+fO7Gyb2fJ+Ph77yMy57dyZyX2ddznvQ0IIMAzDMAwz+PEUuwMMwzAMwxQGFn2GYRiGGSKw6DMMwzDMEIFFn2EYhmGGCCz6DMMwDDNEYNFnGIZhmCECiz7D5ICIXiOiK/rgvO8S0Td7e9+eQkRXEtEHhbhWVyCivxLRbQW+5gNE9LNuHvsTInq4P/WJYQAWfWaQQERbiShKRB1EtI+I5hHRhJ6eVwhxphDib93sExHRZiJa09N+9CZEdDoRvU9E7UTUQETvEdG5xe5XTyCi8UT0FBE1EVGYiJYQ0TldOD5jsCOEuFYIcWt3+iOEuEMI0aOBWm/3iWEAFn1mcPFFIUQ5gDEA9gC4p8j9ORHAKABTiOiIIvcFAEBEXwbwHIDHAYwHMBrAzwF8sZj96glENBzABwASAGYDGAng9wCeNu+XYRgTFn1m0CGEiAF4HsAs2UZEZxPRJ0TURkQ7iOgWZVuIiJ40rcQWIvqIiEab22yudSK6mojWmlbyGiI6PEdXrgDwEoBXzdeumBbdAiL6MxG1EtFnRHSyY7f9zH3aiehNIhqpHP8cEe02j32fiGZnuQ4B+D8AtwohHhZCtAohUkKI94QQVzv2/a3pMdlCRGcq7Vcp97+ZiL6lbPs8EdUR0Q+JaC8R1RPRVcr2vxLRvaYXpp2IFhPR/sr2mUT0FhE1E9E6Iro4x2er8gMAHQC+IYTYLYSICiH+DuB2AL8z7xtEJIjoerPfjUR0NxF5iOgAAA8AOMb0FLUo/b3NcW8/Vu7tfCI6i4jWm33+iXIvtxDRk+brP5vnlX+a/P0R0Y1EtEn5PV1gtnfaJ/P91US00bz+y0Q0VtkmiOhaItpg/q7vlZ8FM3Rh0WcGHURUCuD/AVikNIcBXA6gGsDZAK4jovPNbVcAqAIwAcAIANcCiLqc9yIAt5jnqQRwLoCmHH34MoCnzL9LiCiQo9tHAdgEw0r9BYB/mhas5KsAroLhOQgA+F9l22sAppnbPjav58YM8x6fz9EP2Zd1Zl9+A+ARRSz2AjgHxv1fBeD3joFPLYzPchyAbwC4l4iGKdsvAfBLAMMAbIQhzCCiMgBvAXjavI9LANxHRLPQOacCeEEIkXK0PwtgIoDpStsFAOYCOBzAeQC+LoRYC+M7XyiEKBdCVGe5Ti2AkHlvPwfwEICvAZgD4AQAPyOiyc6DhBDfNc9bDuB4APtgDAYB4zs/AcZn9ksATxLRmHz6RERfAHAngItheLe2AXjGsds5AI4AcLC53+lZ7o0ZIrDoM4OJf5kWUSsMIbhbbhBCvCuEWGlatisA/B3A58zNSRhiP1UIoQshlgkh2lzO/00AvxFCfCQMNgohtmXpy4UA4gDeBDAPgB/GYCMbewH8QQiRFEL8A4boqvs/JoRYL4SIwhCzQ5V7e1QI0S6EiMMYlBxCRFUu1xhh/lufox8AsE0I8ZAQQgfwNxiCMtq81jwhxCbz/t8z7+8E5dgkgF+Z9/EqDAt8hrL9RSHEEiGEBmNwIu/jHABbhRCPCSE0IcQnAF4AcFEnfQWMwYnbPdUr2yW/FkI0CyG2A/gDgK/kcX5JEsDtQogkDHEdCeCP5me/GsAaAIdkO5iIagD8C8D/mPcHIcRzQohd5u/yHwA2ADgyz/5cCuBRIcTH5nd/EwzPwCRln7uEEC3m/b4D5XfDDE1Y9JnBxPmmRRQC8F0A7xFRLQAQ0VFE9A4ZiWutMKwoKQZPAHgDwDNEtIuIfkNEfpfzT4BhmeXDFQCeNQUsBkPAcs0C2Cnsq19tAzBWeb9beR0BUG7el5eI7jJdxG0Atpr7qEInkV6JMZ303bqWECJivpTXO5OIFpnu5BYAZzmu1WQKekZfc90HgP0AHGW6oVvMc18Kw7rujMYs9zRG2S7Zobx2fsad0WQOhIC0J2iPsj0K+71amL+n5wE8LYR4Rmm/nIiWK/d8INy/OzfGmvcAABBCdMD4jscp+2T7vJkhCos+M+gwrfV/AtBhuFMBw238MoAJQogqGPFSMvdPCiF+KYSYBeBYGFbn5S6n3gFgf5d2G0Q0HsAXAHyNjFj7bhiu/rNIicU7GOeIt04EsKuza8Fw+58H4BQYLuJJshsu+64z7+FLeZw3AyIKwhi8/BbAaHOA9WqWa3WVHQDeE0JUK3/lQojr8jh2PoALicj5PLvYPO96pU2d0aF+xn293Og9ANoA/FQ2ENF+MEIE3wUwwvw8VyH9eXbWp10wBkvyfGUwvDk7e6/bzGCDRZ8ZdJDBeTDixmvN5goAzUKIGBEdCUMs5f4nEdFBROSF8WBOAnDGhwHgYQD/S0RzzGtMNR/cTi6DITQzYLhTD4URV65DdnfyKADXE5HfzB04AIagdkYFjDBCE4BSAHdk29H0JNwAI/Z8FRFVmolsxxPRg3lcKwAgCKABgEZGgt9peRyXD68AmE5El5mfgZ+IjjAT2jrj9zAGPI8QUS0ZiZlfAXAzgB85PCg/IqJhZEzn/B6Af5jtewCM7yTvoluQkez4OQCXOvIOymAIe4O531UwLH1JZ336O4CriOhQc0B2B4DFQoitvXwLzCCCRZ8ZTPybiDpgCPftAK4wY60A8G0AvyKidhhJWM8qx9XCcL22wRgkvAfD5W9DCPGced6nAbTDiM8Od+4Hw41/n5lJbv3B8C5kc/EvhpGM12he48tCCNckQQePw3Dx7oQRU16Ua2chxPMwkhy/DsNS3APgNqQTy3Id2w7gehif3T4YA6eX8+hjp5jnPg1GAt8uGG7pX8MYZHR2bBMMj04IxmfQBGNwc5kZJ1d5CcAyAMth5Fo8Yra/DWA1gN1E1Ije5SsApgDYpWTw/0QIsQbA7wAshPE9HARggXJczj4JIeYD+BkM70s9DC/UJb3cd2aQQfZBMMMwhYaIrgTwTSHE8Z3ty3QfIhIApgkhNha7LwxTLNjSZxiGYZghAos+wzAMwwwR2L3PMAzDMEMEtvQZhmEYZojAos8wDMMwQwRfsTvQ14wcOVJMmjSp2N1gGIZhmIKwbNmyRiFEjdu2QS/6kyZNwtKlS4vdDYZhGIYpCESUbU0Qdu8zDMMwzFCBRZ9hGIZhhggs+gzDMAwzRGDRZxiGYZghAos+wzAMwwwRWPQZhmEYZojAos8wDMMwQwQWfYZhGIYZIrDoMwzDMMwQgUW/m0QSGt5YvRs7W6Ku2/e2xdAcThS4VwzDMAyTnUFfhre3iSV1fLChEXe+thabGsKorQzhB6dOw/mHjcP2pggiCR33vL0R89fuwSETqvHrLx2EqTXlmL92DwI+D6bWVGDcsBJ4PVTsW2EYhmGGGCSEKHYf+pS5c+eK3qi9L4TAt55YhkWbm9AW0+D3Er5+3GQ8uWgbwgkd5UEfOuIaAMDrIeip9OeqbgOAmbUV+Mtlc7DfiLIe94thGIZhVIhomRBirts2tvTzhIgQ9HtxxoG1OH12LQ4YU4mx1SX4wanTsWBjo2HJez1YsbMVPz59JmbWVuCwW98CAAR8HpQKL04+YDQmjyzD4wu34nN3v4tj9x+B8qAPY6tLEPJ7cdrs0ZgxugJlQf5aGIZhmN6HLf0+pG5fBDUVQQhhhAWqSwMAgK2NYTy/rA5PLd6GlABao0nrmIPHV+HqE6bggfc24bRZtfjeKdOK0neGYRhmYJLL0mfRLzJJPYVdLVHc/+4mJHWBN1bvtoUClv/8VGuwwDAMwzCdMahEn4jOAPBHAF4ADwsh7sq1f38XfSetkSQ27G2Hz+vB+fcuwDkHj8HssVW4+oTJ8Hl5sgXDMAyTm0ET0yciL4B7AZwKoA7AR0T0shBiTXF71ntUlfoxd9JwAMDxU0filRX1eGVFPbY3h3HnhQcXuXcMwzDMQGagmY5HAtgohNgshEgAeAbAeUXuU59xw2nTMWlEKQ4ZX4Vnl9bh5U93IZrQi90thmEYZoAy0ER/HIAdyvs6s21QcvjEYXj3RyfhdxcfAj0lcP3fP8HVjy+FpqeK3TWGYRhmADLQRD8viOgaIlpKREsbGhqK3Z0eM3VUBa4/eRpm1lbgg42NWLS5udhdYhiGYQYgA030dwKYoLwfb7bZEEI8KISYK4SYW1NTU7DO9SU3nDodz193LHwewuMLt2LDnvZid4lhGIYZYAw00f8IwDQimkxEAQCXAHi5yH0qGLKQz5tr9uDC+z4sdncYhmGYAcaAEn0hhAbguwDeALAWwLNCiNXF7VVhuezo/QAA7XENr62sR1zjxD6GYRgmPwaU6AOAEOJVIcR0IcT+Qojbi92fQnP1iVPw0OXG9MvrnvoYf5i/ocg9YhiGYQYKA070GeDwidXW63kr6jHQCiwxDMMwxYFFfwAyojyIK4+dhDMPrMX25gieWry92F1iGIZhBgAs+gOUW86djXu/ejg+N70Gt81bg6aOeLG7xDAMw/RzWPQHMB4P4WfnHIBYMoW/fbi12N1hGIZh+jks+gOcqaMqcMoBo/D0ku1IaFypj2EYhskOi/4g4NKj9kNjRwJfvOcDzF+zp9jdYRiGYfopLPqDgBOn1+D4qSPR2BHH1U8sxY7mSLG7xDAMw/RDWPQHAV4P4clvHoWHrpgLIYB1u7lEL8MwDJMJi/4gYuqocgDAxoaOIveEYRiG6Y+w6A8iKkN+jKoIYuNeFn2GYRgmExb9QcbUUeXYwKLPMAzDuMCiP8iYUVuBdbvbEEloxe4KwzAM089g0R9knHrAaMSSKby7rqHYXWEYhmH6GSz6g4wjJw/HyPIAXlu1u9hdYRiGYfoZLPqDDJ/XgxOn12DBxkakUrz6HsMwDJOGRX8Qctz+I9EcTmDt7rZid4VhGIbpR7DoD0KOnToCAPDGai7JyzAMw6Rh0R+EjKkqwemzR+NP/9mAw299Cy2RBACgqSOOl5bvLHLvGIZhmGLBoj9Iue38g7B/TRmawwm8ZS7Cc+2Ty/C9Z5Zjb3usyL1jGIZhigGL/iClpiKI+Td8DmOrQnhjtZHJv6UxDACIJ3kJXoZhmKEIi/4ghohw4vQaLNu2DwCQ0Ayxjyb1YnaLYRiGKRIs+oOc8cNKsC+SRDShI6kbU/g64lytj2EYZijCoj/IGVNVAgCob40iqRuWfiTOlj7DMMxQhEV/kDO2Wop+DFqKLX2GYZihDIv+IGdsdQgAcOnDi602XoyHYRhmaMKiP8iprQpltIXZ0mcYhhmSsOgPcoI+b0ZbOMExfYZhmKEIi/4Q4KI5423v2dJnGIYZmrDoDwHuvugQXHb0ftb7MGfvMwzDDEn6negT0S1EtJOIlpt/ZynbbiKijUS0johOL2Y/BxonzayxXj+6YAv+u6GhiL1hGIZhikG/E32T3wshDjX/XgUAIpoF4BIAswGcAeA+IsoMWDOufGHmaHx44xcwZWQZAOC+dzYVuUcMwzBMoemvou/GeQCeEULEhRBbAGwEcGSR+zSgGFtdgtZoEgCwalcrhBBF7hHDMAxTSPqr6H+XiFYQ0aNENMxsGwdgh7JPndmWARFdQ0RLiWhpQwO7sVWawsYyu+0xDTuao0XuDcMwDFNIiiL6RDSfiFa5/J0H4H4A+wM4FEA9gN919fxCiAeFEHOFEHNramo6P2CIsmpXKwAgrun4yoOL8Mn2fUXuEcMwDNOX+IpxUSHEKfnsR0QPAXjFfLsTwARl83izjekCU0aWYbO5xK5candtfTsWbm7CT15chde+d0Ixu8cwDMP0If3OvU9EY5S3FwBYZb5+GcAlRBQkoskApgFYUuj+DXRe/M5xWPyTk1EZ8mFvWwyvrazHH+avBwCE/P3u58AwDMP0IkWx9DvhN0R0KAABYCuAbwGAEGI1ET0LYA0ADcB3hBA84byLVJX4UVXiR01FEHvb47juqY+tbSGX6n0MwzDM4KHfib4Q4rIc224HcHsBuzNoGVURQkN73Nbm81KResMwDMMUAvbnDlGkpa8ip/MxDMMwgxMW/SHKqIog9rbHbG1NHYki9YZhGIYpBCz6Q5RRlUHEkilbW3OYRZ9hGGYww6I/RKmpCFqvLzxsHC6eOx7RpI4oL7vLMAwzaGHRH6KMqSqxXp950BgcPtEofNgUjmc7hGEYhhngsOgPUWaPrbReDy/zY3hZAACwL8zJfAzDMIMVFv0hSkXIb70eXhZEeciYvdkR19DUEUcsyW5+hmGYwQaL/hCmNGAU4xleGkBZwBD9SELDBfd9iNvnrS1m1xiGYZg+gEV/CPPyd4/DDadOR2WJD2VBYwCwL5LE9uYI3li9m5feZRiGGWSw6A9hpo6qwPUnTwMRodS09Lc0dgAA9rbHsaa+rZjdYxiGYXoZFn0GACz3/qa9Yavt3XUNxeoOwzAM0wew6DMAgFLTvb/ZtPQrQz6889neYnaJYRiG6WVY9BkAgN/rQcDnwdbGCADgwsPH4+Pt+1DfGi1yzxiGYZjegkWfsSgLeJHQUygNeHH5Mfsh5PfiW08s44Q+hmGYQQKLPmMhk/lqK0OYUlOOG06djhV1rahvjXVyJMMwDDMQYNFnLOS0vdGVIQDAnP2M0rwr6loAAOG4VpyOMQzDML0Ciz5jIS39MVWG6B8wphJ+L2H5jlb846PtmP2LN7BxbzviGlfrYxiGGYiw6DMW5UFD9Eeboh/yezGzthKrdrbi5U93AQDO+/MC/Oi5FUXrI8MwDNN9WPQZC1mWt9Z07wPAxOGl2NUaRVIzkvnCCR3bmiNF6R/DMAzTM1j0GYsy09KvrUqLfk1FEA1tccT1lNUW4dg+wzDMgIRFn7Fws/RrKoJoj2toi6aX3OWEPoZhmIEJiz5jIS39MYqlP6oiCADYuS9dpKeDRZ9hGGZAwqLPWEwcXoqaiiBGlAettlGm1Z9Q3fsJnQv2MAzDDEB8xe4A03/46pET8eU54+H1kNVWowwAJFpKIK6lEPJ7C9k9hmEYpoewpc9YeDyUIeSjKjNFH+C4PsMwzECERZ/JyfDSgGt7JMEFehiGYQYaLPpMTjwewkHjqjLaOZmPYRhm4MGiz3TKI1fMxUkzauD3pmP9kQSLPsMwzECjKKJPRBcR0WoiShHRXMe2m4hoIxGtI6LTlfYzzLaNRHRj4Xs9dBlVGcJjVx2JQ8ZXW20dcXbvMwzDDDSKZemvAnAhgPfVRiKaBeASALMBnAHgPiLyEpEXwL0AzgQwC8BXzH2ZAiLn8QOcyMcwDDMQKcqUPSHEWgAgIuem8wA8I4SIA9hCRBsBHGlu2yiE2Gwe94y575rC9JgB0kvvAmnR/+m/VmJYaQA/PG1GsbrFMAzD5El/i+mPA7BDeV9ntmVrZwpIWSA9RnxzzR4k9RTeX9+IBRsbi9grhmEYJl/6zNInovkAal023SyEeKmvrmte+xoA1wDAxIkT+/JSQ4rhZQGUBbwIJ3S8tWYPnl26A40d8WJ3i2EYhsmTPhN9IcQp3ThsJ4AJyvvxZhtytLtd+0EADwLA3LlzuV5sL3HNiVPwxUPG4px7PgAAfLipCZGEjiYWfoZhmAFBf3PvvwzgEiIKEtFkANMALAHwEYBpRDSZiAIwkv1eLmI/hyQjyoM4cFwV/n710ThsYjVeX7UbABBO6IhysR6GYZh+T7Gm7F1ARHUAjgEwj4jeAAAhxGoAz8JI0HsdwHeEELoQQgPwXQBvAFgL4FlzX6YIHLP/CJx5YC30VNqJwm5+hmGY/k+xsvdfBPBilm23A7jdpf1VAK/2cdeYPDls4jDb+6ZwAhOGlxapNwzDMEw+9Df3PjNAmDWm0va+sZ0tfYZhmP4Oiz7TLdRCPQDQFGbRZxiG6e+w6DPdZlip33rd2JEoYk8YhmGYfGDRZ7rNWzd8DvOuPx4lfi/2hbOL/kvLd3KiH8MwTD+ARZ/pNiPLg5g9tgrVpX60RpOu+zR2xPG9Z5bj6seXFrh3DMMwjBMWfabHVJX40ZJF9FPmtL7tTZFCdolhGIZxgUWf6TFVJX60RtxFP6GnbP8yDMMwxYNFn+kxudz7Cc0Q+ySLPsMwTNFh0Wd6THVJAC1R90Q+aeEndV4CgWEYptiw6DM9pqrUj5Zs7n3T0ldL9jIMwzDFgUWf6TFVJX7EtRRiycxFd6ToMwzDMMWHRZ/pMdVmkZ7LHlmcEbtn0WcYhuk/sOgzPaaqxBD9j7buw5bGsG1bnBP4GIZh+g0s+kyPCfm81uudLVHbNjdLf1NDB467623sbYv1ed8YhmGYNCz6TI+ZUVthvd65r3PR37CnHTtbotjWzAV7GIZhCgmLPtNjJgwvxaY7zoLfS6jLIfqa6eqPmgl/0URm4h/DMAzTd7DoM72C10MYW12S6d5XYvrhhBR7o80t259hGIbpO1j0mV5jXHUJdu6zu+zVbP5wXAOgWPos+gzDMAWFRZ/pNSaNLMOGPR2IJXUs2NiIix74EO0xzdoejmv4zlMf48H3NwFgS59hGKbQ+IrdAWbwcM5BY/D04u14Y/Vu3P3GOtTti8LnSY8rO+Ia5q2st97Hkjydj2EYppCwpc/0GkdPGYFx1SV4dWU9Aj7jp7Vwc5O1fXerfYpeNKljxk9fwy9eWlXQfjIMwwxVWPSZXsPjIRw6oRor61qxw2U63vo9Hbb30YSOuJbC3xZuK1QXGYZhhjQs+kyvMnVUOXa1xpDUhVWpT7J+b7vtfVM4XsiuMQzDDHlY9JleZdrocuv1GbNrAQB+LwEANjos/foWrsjHMAxTSFj0mV5l2qh0db5JI8sAACG/Fx7KtPSdc/oZhmGYvoVFn+lVJptCf+OZMy33fjyZQkXIDyHs++4yRV8m/TEMwzB9S15T9oioDEBUCJEioukAZgJ4TQiR7NPeMQOOgM+DLXeeBQB4deVuAEZVvtElQbRG7T+XNnMOf2nAC4ZhGKbvydfEeh9AiIjGAXgTwGUA/tpXnWIGNkQEIkJ1aTqRrzLkz7p/WSD72DOVEnjkgy0IxzW8/dkefLipMWOfl5bvxOSb5nGxH4ZhmE7IV/RJCBEBcCGA+4QQFwGY3d2LEtFFRLSaiFJENFdpn0REUSJabv49oGybQ0QriWgjEf2JiKi712cKg5q9XxEyhD3kz/zJ5XLvv7V2D259ZQ1+8/pn+Ppfl+KrDy3O2Oc3r6+DEEBDO88GYBiGyUXeok9ExwC4FMA8s60nPtlVMAYQ77ts2ySEONT8u1Zpvx/A1QCmmX9n9OD6TAFQRZ9gjNFmjK7I2M9t+V1JmxkSaI9rWfeRwz9nzgDDMAxjJ1/R/z6AmwC8KIRYTURTALzT3YsKIdYKIdbluz8RjQFQKYRYJIQQAB4HcH53r88UhirFvd8eN8R7uovox7W0W15PCQhFvbWU8drvyf5TlaKfYtVnGIbJSV6iL4R4TwhxrhDi10TkAdAohLi+j/o0mYg+IaL3iOgEs20cgDplnzqzjenHlCuxernwzozaTNFXa/Dv/5NX8cNnP7Xea+YqfT5v9miOx1R9LcW1/BmGYXKRl+gT0dNEVGlm8a8CsIaIftTJMfOJaJXL33k5DqsHMFEIcRiAGwA8TUSV+d6Mcu1riGgpES1taGjo6uFML+HxpIXaawrzpBFlGftJS19a+P/8ZKe1Lamblr43+09Vin48R5iAYRiGyX+VvVlCiDYiuhTAawBuBLAMwN3ZDhBCnNLVzggh4gDi5utlRLQJwHQAOwGMV3Ydb7ZlO8+DAB4EgLlz57LPt8iMKAvgwcvn4JUV9aitCtm2HTiuEqt2tkFPCdfYvrTefZ7slr7ckis3gGEYhsk/pu8nIj+MOPrL5vz8XhdTIqohIq/5egqMhL3NQoh6AG1EdLSZtX85gJd6+/pM77Pwpi/g7R9+HlNHVeD7p0zH5JFl8CoCPmfiMACGtd+hJOs9s2Q7hBCWpe/LYelL1Zf7MgzDMO7ka+n/BcBWAJ8CeJ+I9gPQ1t2LEtEFAO4BUANgHhEtF0KcDuBEAL8ioiSAFIBrhRDN5mHfhlEboASGt+G17l6fKRxjqkps78uCPmy64yws27YPLy/faVXwiydTCCuif+M/V2LyyDJolnu/85g+W/oMwzC5yUv0hRB/AvAnpWkbEZ3U3YsKIV4E8KJL+wsAXshyzFIAB3b3mkz/Ys5+wzBnv2F4Zsl2AEDMYekDRuZ+0kzky1WWQToOEjoX52EYhslFvol8VUT0fzI5joh+ByAzI4thukjQLNbjtPQlUbPKXlypticcU/NkDYCExu59hmGYXOQb038UQDuAi82/NgCP9VWnmKFD0GfUeIppOsIJu+hHEjoiCUPs1W3O2D1Zln6me3/j3g4ce+d/sKeNl/FlGIbJV/T3F0L8Qgix2fz7JYApfdkxZmgQUix9OZdfEo5rVj39SDxt6TvFnXLE9B9bsAW7WmN4fdXuXu03wzDMQCRf0Y8S0fHyDREdB4AXQ2d6jLT041oK4bg9Jh9OaIi6WPpxx8I6VkzfRfSFYx+GYZihTL7Z+9cCeJyIqsz3+wBc0TddYoYS0tKPJfWMmH4kriMiLf1EWuidRXjImrLnIvpm/J/XZ2IYhsk/e/9TAIfI6nhmoZ7vA1jRl51jBj+qpe/M3g8nNMRMsY/mEn1kd+/LnD/WfIZhmPzd+wAMsRdCyPn5N/RBf5ghRtCX3dIPxzVEkkZbWBF9p7h7ciTyyUV4UinO7GcYhsnXve8G205Mjwn5DUs/mtCxtz1u2xZO6JaFH1Vj+mat/oWbmlBZ4lPas1v6aniAYRhmqNIT0WfTiekxcvndW+etycjej8TVRL5M9/5XHloEAJg1xliTqS2axKQb5+GnZx+Ab55gTC7RTdVn0WcYhunEvU9E7UTU5vLXDmBsgfrIDGIqgj4EfR6b4G+962wcMKbSsPSTmTF9p3tfLsqzu4WRdSUAACAASURBVNWYi//oB1usbXLKXzTJos8wDJPT0hdCZC5+zjC9CBFhVGUQO5qNGaA3nDodAFAW8KIjplkWftjFvS+R9fllpr+6pG+HOQ3QrdofwzDMUKNLiXwM0xeMqjCW2z3roFpcf/I0AEBp0IfPdrdZVr1aeTeeTEFXEvOSpqXfHksCgG0Vv4gp9lF27zMMw7DoM8VnVEUQADC6MmS1lQW82BcxRHyYGfeXJPSUvSyvWXO/LWqKPqmWvrEfx/QZhmFY9Jl+gBT9WlX0g0bkyechzB5bZds/nkyhQ8kBkDH91qjRps7Jl2If4Zg+wzAMiz5TfEaZYl9blRb9clP0p42uQGnAa9s/7liGVy7AIy39vW1xPLt0B4B0LD+a4Jg+wzBMT6bsMUyvUFOeaen/vyMmAABOmzUaTy3Zbts/rqWs+L3x3rDiZXGe9riGHz+/AqfPrrXCAOzeZxiGYUuf6QccO3UETp01GrPHpd34B4ypxC3nzsaxU0fC71gt57Z5a/Gl+xda72PJzKI8gJG8J7f1RSLf1sYw5t72FnY0R3r93AzDMH0Biz5TdMYPK8VDl8+1XPpO/N7u/UwblAp/fWHpb9zbgcaOBDY3hnv93AzDMH0Biz7T7/Epov/WD07EC9cdm9dxjR2G6If8HoQTGjQ91as1+K3QAdcAYBhmgMCiz/R7/N60e3/iiFJMHVWe13FN4QQAYERZENGEjvPuXYA/v7Ox1/rF0wEZhhlosOgz/R7Vve/3eFBV4s+xd5om09IfVuaHlhLYsLcD25p6L/4eMav9RXhmAMMwAwQWfabf4zMtfb+XbCV2O6PZtPSHlxmzAxJayqrF3xuwpc8wzECDRZ/p9wRMS1+1+J3Wvizwo9LYId37AatNXXjnhN+8jXt74O6XNQDCLPoMwwwQWPSZfo/PY/xM1fK67/zv5/HYlUdY78cNK8k4rjlsuPeHK6KvWvo7mqO4+411Wa/bHkvaavw7kWLPiXwMwwwUWPSZfo9075colfmGlwUwe2yl9X5cdaboN1nu/UxL37k8rxMhBA665U3c+MKKrPtIS59L/DIMM1Bg0Wf6PdK9P6WmzNauDgLcLP0mN/d+Ir+ldmV1v+eW1WXdxxL9blj6e9tjOPiWN7CyrrXLxzIMw3QXFn2m3yNguNj3r7FP1SsLpIv5jB9WmnFck4t7P25a+OFOMu7lfpQjb7AniXzvfLYXbTENf/1wa5ePZRiG6S4s+ky/R06zc4q+x0NWvX63RD5Zgne4q6XvLtQLNzXhsF+9aXkJcs0VsFbwyyL6HXEND7y3yTUvoN1cJbAiNPSWvxBCdBpeYRimb2DRZ/o9sjzvweOrMrbNnTQMQHp6nhtuMf2OLC75u9/4DPsiSayoawEAeHKY+pZ7P4vX4L/rG3DXa59hbX1bxjYp+pVDUPTvfWcjpv/0NbQpiyYxDFMYiiL6RHQ3EX1GRCuI6EUiqla23UREG4loHRGdrrSfYbZtJKIbi9Fvpjh8/5Tp+OtVR2DupOEZ2777hanweghHTR6OR6+ci7MPGmPb7vMQKpXpfTJ7P5tQy2V6U8L4V0sJTLpxHlojmQLVmXtfhgjiLlatPDboT+clLNvWjPPuXWCtGjhYed7Mk5DeFIZhCkexLP23ABwohDgYwHoANwEAEc0CcAmA2QDOAHAfEXmJyAvgXgBnApgF4CvmvswQoCTgxednjHLdNrO2EpvuOAtTasrxhZmjMUvJ6AeAEr8XpUrCX1wz6u9nS+STbuekZnfJ17dFM/btzL0vz+Xmym6NGoOIuJL5f/OLq/DpjhZsKcICPrk+k95Gek/kwIphmMJRFNEXQrwphJBPmEUAxpuvzwPwjBAiLoTYAmAjgCPNv41CiM1CiASAZ8x9GcaGc0GdUMCLkM9ra4tpOjqyxPSTZta+0/XsdPMLIdIL7mTxGsTNc8mZACpyBcCYMiDQzL57c2UP9hHzVtbj6Dv/06sVC7Mhb0+w6DNMwekPMf2vA3jNfD0OwA5lW53Zlq2dYWzoDiEJ+T3weAglihs9lkxld++nDBGWMXeJU4ajSR1CGAKWLSkwl6UvRT+qeAlyFQJysmhzU6/W/K9vjaI9phXE2pcDKJexEMMwfUyfiT4RzSeiVS5/5yn73AxAA/BUL1/7GiJaSkRLGxoaevPUTD/HqZvSyldd/NGkbkvke3fdXtzy8moAaYF2ir7mOLGcGVBV4kc0qbsu2Su9Bq6iby4GpJYFlqLv5hlQ2doYxiUPLsIvXlqdc7+uIHMZnPfZF3jN9RO0FKs+wxSaPksdFkKckms7EV0J4BwAJ4u0n28ngAnKbuPNNuRod7v2gwAeBIC5c+eyD3EIIcV3eFkAzeEEYmZSXEnAC5ih8mhCt1m0Vz72EQDgxjNnWuLX7nDvO4Vbvq8u8aMlkkRM01Ea8Lnuk9DtngBNT1krALqKfifT2XbsM6Yw1u3LzDPoLvLayQKY32Ra+prO/zUZptAUK3v/DAA/BnCuEEJd6/RlAJcQUZCIJgOYBmAJgI8ATCOiyUQUgJHs93Kh+830f6QVOXmkUb1PZoirhXxiSd3VJV+3L4JkFkvfKYZSmCtCxsyApxZtx0dbm133cYp4Q0fc8kioiXzS8pX7P7FwKybdOC/D5W5VGiwPoLfQzPsrhBDLhRILMcBgGMZOsWL6fwZQAeAtIlpORA8AgBBiNYBnAawB8DqA7wghdDPp77sA3gCwFsCz5r4MY+PqE6fgymMn4SdnzQSQzqxXS/Yaop8Zu97RHLVc685EPqfLXVrvlSXGYOKO19biqUXbXI9Rp+ztbY9h3op6673d0jf+ld6Gh/67xTwmbjuvXFNALS/cU5Kpwrn3ZUy/szAGwzC9T1EqgwghpubYdjuA213aXwXwal/2ixn4lAd9uOXc2daUOIkzpu9Whnd7c8SyPjMtfbsYSiGvCBqWvhB2AQfcLf0rHv3IKtYzsjzgSOSzhwN8HrK1S+Tqgc7lhXuCbol+3wtx2tJn9z7DFJr+kL3PML2OUxBnj63EjNEVAIBrHl+G+tZYxjHbmyOW290Z009miemrZXSjZnLfpzta8OGmRldLf3NDh/V60ogy6xggbWXLc8tQhbO4j8z8702jPFlA976cs+f8TCXRhI5NyufEMEzvwaLPDFp+dd5sPP3NowAAN589C/deehgAwyL/ZHsLRlfa6/Vvb06nl7R1MaYPADHTaj/v3gX46kOLXS39icPTCwPVVoVs8+LT2fvmfH1T9FVvwF/e24Rnl9aZfUxi/Z72XB9B3kixL0ScvbOY/lOLt+GL93zQpSmMDMPkB4s+M2i5/JhJOHbqSOt9Zchu/Z/lKNm7W7H+nZZ+e1yzibe04lVLP6Zlce8r4jZKGWiUBrzuom8e5zeXFFYr/v369c+s148v3IbTfv9+r4ij9DLke66eFNbxdhLTb40mEUnonOjHMH0Aiz4zZBhVGcIz1xyNi+eOR8DrwamzRtu272pJT4Fzxpt//PwKnPb796z3cdMtr9b1jyY6j+mrAh7ye1HfGsPht74FIC240YSGs//0X6zc2ZpxzLH7pwcxzuv0BJm9n0+c/Y3VuzH5plexral75YJlIl+2a8nBQCGSCgHg/nc34Zf/Hjh5wXpK4PGFW3mlQqZbsOgzQ4qjp4zAzWfNwgvXHYvR5rK8gJEJ35RjpT4A2NqUdv+7WfrORD65cI76cA7HNRw+sRpLfnKyVSVQrhAoRW5NfTtW70qvzBdNpkMNCS2Fo6cMx0hlul6viH4XEvke/cCYVaCGQ7oCdeLet0INBRK1X7/+GR5bsLUg1+oNnlu6Az9/aTX+8t6mYneFGYCw6DNDjqpSPw4aX4X9lPj6JHNef75IoVWXxnXWrbcW1VHc/h0xDVNqyjGqMoSQUhpYUwRw+Y4W23lUSz+u6Qj6vFa8HwDies/r5UvR/+P8DXj5010595ViH3SsaZAvaUs/m+ibXoduziRojyVR39p7hYv6G/vMFR+zLQ/NMLlg0WeGLD6vx8roHz+sJK9jNEdpXTWRz+nelw9n1RLviGsoDxoDBTUurk4RlFP63M4b11II+T3wedL/dZ2W/p2vrsW76/bmdT8SeV9Lt+3D9X//JOe+cuZDdz0MsuvZjrdqBnRzJsHZf/oAx9z5dreO7Qmf7W7DjS+scC3J3JvI78rnLfzCTMzAh0WfGdL87etH4hvHT8bRU0bktb+smR/P4t5XhVyW2pWhACGETfR3KYmDTWF7AR4Vp+gHfV74lQe+c0rfE4u24T9ruyb6+c6ZV+/PWV44XzqL6fe0OmB3ww495VtPLMMzH+3o8+vLQZE68GOYfOFfDTOkqa0K4WfnzMLI8nRWveqyd7LbYeWqMwJSwi7Actqf3Dea1JESQLl5fnXA4Ky6pxJRwgbxpI6gzwOfN7ulH9dStpBCPjgLAGWjsSOd99BdS586de+bMf0BtiBPyhwQOZdh7m3kd+Xz9Ow6Fz+wEF+6/8Pe6BIzgChKRT6G6W9Ul6bFu6rUnzFPX+IU/QrHAMFZyQ9IDwRkDLbMtPR/dPoMJPUUnly03Sq440Y0oWNvewzDSwOGpe/32B74qvhqegp6SlirAOZLvpnyahzZ6WHoKtlEv6fu/e4ihLAGJN1BjlH6WPOtz0Ud+HWHJY61IpihAVv6DANjtTyJP8fD1BnPrnDM/d8XyZwBIPftMAcEFabolwZ8uOCw8QCQU/QbO+L4/N3v4rlldYglddO9r1j6inhKIe6qpZ/vnHj1vN0V/VQnSwinpw8W1tLv6RTBntQu6AoyLOIfgjH999c34KrHlhTssx6MsKXPMLCX7Q3kEP2WSAL//nQXfj9/PbweQsBnWN1SMJpdpv1JcZMr+0lLH0h7CnKJ/uaGMCIJHRv2dKQT+bzuln5a9LsmmPkW5YkrHoTuuvfltMCk5n5NKWqFmqcviSX1nAO+bDy1eBuGlQasssh9XUmwt9z7A5Fv/m0pEnrK/H+Q3+yRDzc2wu/z4IhJw/u4dwMDFn2Ggb3ITsBnPPgrQr4Md31bTMNLy40pbUFzv6DPA81MttvnJvpyud64kc1froi+HADsbMk+xUwWwdnZEoGWEoalryRxqda3vFa8i+59t6S699c3oKE9ji/NGW+1qdMSuyv6UhSzxvRTMpGvsJZ+XEuhohvH3fziKgDAqAojL6SvBysy/OEdgqIvfxtdMfS/+vBiAMDWu87uiy4NONi9zzCAZTX8zxemWg+UYaWZS9e2xzS0mC58OTgIKhaHnKanIq1uaemrom9l8ucQ/bA5oNjWJOfHe2wPfLulr9v+BYC9bTFLrOetqMc7n2Vm9rsV5bn80SX44XOfut4L0P2lcbXORN9aB6Cw7vZc3pH31zd0OvdfXq2vLX05GBqKaxPIWy7EapCDFRZ9hjHZetfZ+OFpM6xktWGlmUvXtseSaDZFX2pKyJf+b/STF1dmHCNFWU7hU5MGy8wlf3e12Ff9c4vXqqLvyzJlz829f+Qd/8E1TywDANz37kY8YlbUU8k3ac4m+j209LMNGqwV/3r4YO+qKDqLK6n9ufzRJfjqQ4tzHp/qQlXDnpCunjj4Rb8tlsSCjY0Z7az53YdFn2EcSJd+dVZL37Dm5fz5YCexRSmOG/Z2IOT3YGx1uhCQz+tBid+L3W120VenEEpkmd+g35HIp4q+6daXAia3vb++wTqHm7jlKyC9697PFtN3n6d/zj3/xcP/3Zz3dboqitlCIjvMefc79+W29OWUvb639Hv3On1dTKg7vLR8Jz7c2Ihrn1iGSx9ebFW3lLCl331Y9BnGQYcZe7947gSbVT6iLIDWaNJy70tLNejL/G9Uobjw5X7r97Rj2qiKjFisnLevJma5ib4k5Jyyp7u592VIwZ6TEEvoGasBAvnHz3vDvW+JfpZBQzb3/6qdbbht3touXydfss142NJo5FTUVGT/TgAULJHPynnopev0dOplX/DH/2zAk4u3YY1ZndL5mfan0EZrJIlj7vwPVta1FrsrecGizzAO5Bz3mWMqsPznp1ntNRVB7GyJwvm8cRP9A8ZUWq+lRbxudzumj85MFZNxfXUGwQhzQZ1DJlRn7O+csqdaqE73vrM+ezSpZ5QLBgpr6XcW03fL3u+ONdpVUcxW20CK/ujK3KIvBqil71woqj+gpwQ0XUDPcq96P5qyt3BzI+pbY7jn7Q3F7kpesOgzTBaGO9z7NRXBDDcjYLjbndb7AWPS4h5N6tjVEsXe9jhm1JZnHC+n7dlEv8wQmBOnGUvpqpa9M6bvaumbD/K2mL2/hns/U9xyxfTVhDg5mAh4Pdbr+Wv2YNKN8/Je5KazmL7bPP3uWKO9ZelvNkW/s2I48mPq8+z9Xl56OFsuQzHRdIGUEJa4O935hS7clAv5vfd1JcbegkWfYbKgijCQ3b0b9HkwqiKIV/7neKtt1ljD0q+tDEFPCTz4vhGLnqis7CeZMMxoq1CuJ8MKPo8HH918Cl793gnK9bxZF9yRVr9l6StTDlNmpT43ccsVI1XFRR5bWeKzrvv0ku0AgNU72zIPdj1f7uI7VqKa8mCPJLq+olw+cV91QJPN0t9uJlA6QyXO4wsV05f9zLd0cmf0V0tf/gGZIp/qR5a+7MkA0XwWfYZxctKMGgCAx2G9Zxd9L0J+Lw4cV2W1nTF7DK48dhKeu/YY7F9Thr9+uBWAe6x+So2xrG+JP/3fUWb1CwjUVARtg4Wg32PL7k+4xNnjWspa4EciBwJdde+rVrYUnPKgzxoAyAdzvvPG9U6m5Lll73dHmPIRX7cBjRM54Ih08rmlCmTpy34OZktfFwJaSlji7hwg9qeZC/1o/JEXLPoM4+DBy+di9S9Pz2gfVRFy3f9z00fi1FmjAQBz9hsGwEjOu+Xc2ZgwvNQ2GHAT/f1rDJd/izLHXw445LNNrT4W8tnDCTb3ftI+AFBFXwpnzMVVnstd6qwDEPB5EPR5rXb5YM5X9POdp5/QVSu868KUjwtY3SdbCEH21239evUe5CCjtyzwbFiWfpb7E0Jgw572LpyvH4q+KfiWpe8Q+VRKIBzXcP+7m7rkWUnqKddBb08Qpq0/UCx9rsjHMA78Xo9rOdaR5ZlT+ADgsmMmWa//etUR2NIYtglgbVV6sDDC5RzS0m/sSOCsg2rx6srdIBjHuxWYMSz9LO59xVqNJVO2ioJS9PWUQFJPwe/14L53N6KmPJghwPY4vrrKXwpBnwcBn8cabEjhzFf05SAhWyKgW0U+Nyu7M/IRA3Ulv3gW8UtkmQkB2EsJOz+PviLWiaX/wsc78b/PfYrHv34kTpxe0/n5uli9sRBoesqM6xvvnb8VLSXw2zfX4bEFWzF+WAm+eMjYjHPUt0YzBuqXPbIYizY392p1PvlfRf6f7e+w6DNMnpQGjP8uoyqC2Nsex9RRbkl5fhw83p5xX1uZfvCo1fgkU0xLf0RZAPddOgcAcM9/jExgt9hl0GefshfXdAghsN6sza+2S+uUyO7Wl3Xmn19Wh4nDSzMEUnW9OwcVIb/XEH3NXhkuX0sn7+x9pQ9q31fUteCjrfvwjeMnZxyr3kc+LuB8LH3Zz0hCRyolbGEft2TEvo43x5P2z93JJ9v3AQC2NoVxIjoX/d62fHsDXXHtA5nfpZ4SaDKXeXb7He1ujeGYO9+2SiNLFm3u/ZUFrZ4NDM1n9z7D5MvwMsNK//bn98eKW06zJe7lYoxi6bst3Voe9OHPXz0Mj151hNXmdO+rBH1eWyZ5XEvhvfUNOP0P72NtfdqtG0+mrEQ+IezJcNLqjyZ0ROJ6xkNVjafHHYmCQZ8HAa8i+iJTpHMh3dJZ3ekyez9LTP9fn+zCb17/zPVYVQDyEV/Vm3DbvLVW0p5tH+WziTi8AW6CU+yYfkKZYZENWwJjF1dkLAQypi9x1pHQU8L6jUqvV2skad1Xo1n9cm+Ohay6yvo97a7VLOU1B4jms+gzTGfMNjPx5+w3DPNv+ByuPG4yKkP+vFf5Gl3pngugcs7BYzFOqdQnxwZuuuWsC5DQUthuVo2T1eMAQ1TVOLS6LoC0FsNxLWNaH2B3W7+7bi8m3TgP63a3I6bpCPo8CPrT7v3OFtBxIh/mbtMfgfSCMlqWmH5ST2W9lmp55zMISTqE8w/z12fuowxOnC5+txBFsbP35WeTa8VAtYu5LH0hBJZsaS544Rk9JWy1GZxJn7oQSJi/Ub+XsKM5gkN+9SYeXbAVX3lwEc6/d0HGOXtaefCCexfg1lfWZC1k5Tag74+we59hOuGF6461RNLNpd8ZY6pKOt/JgbTS1DD54ROr8fH2FgT9Hit5CDCEp9G0aKSFAxgWoSroTcq2WNIICUQSesZKgvJYybNL6wAAy7btQzxpLGmqWvrpDOs8LX3z4dse06DpqYz575oVG3eP6Sf1FFLCOI8zj0AV6Lyy9x0P8IkjMqdUJnSBsoAX4YSeIfquln4fxvTfXbfXGshltfTNPuXqherJcUvslKytb8fFf1kIAFj1y9Ndw1N9gZYStgI8zumXMi/FeA3UmSWS31y9G4u3uLvwkz1MsJQLX0WSOiqV32w6pp8fQgh8vL0Fh080woCFHiyw6DNMJ4T83rytejeyJQDm4tKj9sO2pgiu/fz+VttjVx6JT+tajNwC5Yme0FNoMOObDYo785EPtuCfH++03ssYKGBYiwk9BS0lXC3usCKyclZBacCLuKYk8mn2xLV866FrqRQqQz60xTS0RJO2GQ1GLNd4rVrhUZdKgAkthZKA/XtRBx65+vOj5z7FweOrcMz+I23tpYHM71lLpVBdGkA4EbVWSrT64iL6fWnp//DZ9KqH2a4jLeBs0xw/2tqMix5YaL2P5bD01d9GJKEVRPRTKQEh7IMn50BK09Pu/bimW4O/XCGdng7GPGR4SKIJHZWhdE2Nrmbvv/zpLnzvmeXW+0Iv+VsU9z4R3U1EnxHRCiJ6kYiqzfZJRBQlouXm3wPKMXOIaCURbSSiP9FA8aUwQx6f14OrT5iMx5SYfWeUBLy49fwDbQ+XqlK/lY2tPr7iyZRl4bcrlqgq+ADQFE6LvlqO120qmmrRtkYT5n0QYsl0Il/cYenn81AVwhD1kWaClVzHQJK0ueeVmL4iTFJo3QTXbQqdG++tb8DCzU0ZAwM3b0VSS1mFmpyfldv+fRnTLw/5MHF4KcZUhbJ+3lathiyi/+e3N9re55qyp36ePV3qOF+kha8KuNOjkhLC6k9cS0Ea3rk++3zDT9mQBbGc3h75PeQrSGreTTEoVkz/LQAHCiEOBrAewE3Ktk1CiEPNv2uV9vsBXA1gmvl3RsF6yzA95OazZ+GkGaN67XxqIlZCT9ksfCdfO3oigEz3fjiHhWd3p5sP12TKsvSDvsyY/gsf1+Hfn+6yjvvnx3V4bWW97bxyX2ndq3kGgP2hnXTJ3vd6yHp4u8XTbTH9HAIQTepmeMHY5y+Xzcl6zqQuMKzMEH1nZUD3mH7fTYGLJ1M4espwVIb8WT0ZMsSRLVbvFK1chY+yDcLy4f53N2HR5qYuHQPAdW6+5ojxa4p7P57U4TUFOVfcvqeDFulNcE4fTc9eMbb/9F8r8aHLcsCSYtdFKIroCyHeFELIX94iAONz7U9EYwBUCiEWCeNp9ziA8/u4mwzTb1G9mHFNt8XyVU6aUYPzDx0HwG7px5I6Ii4WvuRRlyxluSxv0GeP6cuH3n83NOKJhdus/W949lNc99THAIxpZP/6ZKf1IK+Roh+2W/qaTbQzs/dTQljXdbPc8rX040kjyVHuH/AaVQ6TegovLd+J6T99DY8t2AIhBJKptKX/wsd1uOmfK12vl891JZsaOrosooDxXQfN4kzZrhOxijBlEX2HaGWbp//e+gZbfLyrovnr1z/DJQ8u6tIxQFrs7Yl8KduALmUuyAOkq0+qx7qf1358V5HTZDPyOlJpS7+xI44nF23H1x5ZnPU82So/For+kL3/dQCvKe8nE9EnRPQeEcmC4+MA1Cn71JltDDMkUR9ZbVHNJvpqdv/I8qBVX+Dtz/Za7dFOLP3XV+/OaIsldcS1FEJ+j+neN4v9qIV8XBPbUrjgvg/x/X8sVyx9I8+hxWHpJ7PEcaXoG1MPdXNfF9FXZh3saI4goaXw7rq9eGxBehCjpwQSegrhuGaJhM9L8Hs9SOop/OOjHUhoKaytb4NuxpdlLPutNXts3gy3EENn7v2dLVGc/Lv3cNdr7tMOnewLJ3Dy797Fhj3tiCWNz9/npazXaTfj8NFECq2RJCbdOM/W5wxvhe7+O7ji0SXWmhHGfRWmiI+1sp6w/xacXhw5+ItrKaUaYg5LX/ltdGeVPjmN1jlt0xq8EbB+t+G6zzVzotjFkPpM9IloPhGtcvk7T9nnZgAagKfMpnoAE4UQhwG4AcDTRFSZefZOr30NES0loqUNDQ29cTsM069Qn1m7W2O2B8kEpU7/yIogpo8uxwWH2cfI8WQqp6XvRjSRtvTLgj5r8R71Qevm7n5rzR7rteZw7+9sieL2eWssV7QqLMksU/bCpmi5u/fT+934z5X4wT+W48rHPsIv/73GapeDlY5Y2tL3eTym6KuehPRc8fKg32qzTR/sxpQ96d34cFN+ru/31jdgU0MYf/zPBsTM4ki+HJZ+mzkbI6bp2NzYAQB4WPHcOC3VeJ4i1JVEuJ4kM1oLMikiraVStn7qKWENBONaus5ErkQ+NXu/O/2Tln4k7hR9aekT1pvljytC9sW6VIrt3u+zVEwhxCm5thPRlQDOAXCy6bKHECIOIG6+XkZEmwBMB7AT9hDAeLMt27UfBPAgAMydO7cw2ScMU0BkxnBF0GdL3gOAmbUVqG+JIpzQMaIsAJ/Xg99edAhe/CT9Xyam6V0ubRtJ6miNJlFV6rcWAKrbF3WIvulaVh5sy+tarNfSKqoI+eD3Et5aswdr6ttw4vQa6g40RgAAIABJREFUnDCtxp6xrTyk1b7KDHo3Kzuh2f+7z3PkFADpWHd7PB3T95uWfkJPWQmKsaRuXaM8lH5UailhTTXsTiKfXBY5X8tZehlaIkkIISsyerKKsJymGUvo1kwIdWZjPjMQ3OhKIlxPhE1a4apHIumw9HWz9j5gDFrysvS7WLjJifzewg5PSbruRAKPmB6lXN9tsUW/WNn7ZwD4MYBzhRARpb2GiLzm6ykwEvY2CyHqAbQR0dFm1v7lAF4qQtcZpl8gn1nVZZkWxYzRFaguNdzncmVAr4dsddiNRL6uWfoN7XHEtRRGlgew3whjvYBtTWG76OspvPhJHWb+7HWrbc2u9JK70jrzeT2oLg1gZ4sxv1oKkT1xLDORD4BVV8BZHvjWV9Zg6dbsZVZl3FfOS1dj+j6vBwEvIamlrPPGkrplyVeG7PaRHBi4ucbztSLzzfKXCWQyhBPyZ4/px5J6uv9meWbAvta7M3Ev2xoI2fr75KJtmHTjvJyx6R6JfkqKfvocmp6y9VMXwhrsxrV0saZcn719QNkdS9+QS6eHTA5k56/dix3Nxu+5JZLMmkg5aN37nfBnABUA3nJMzTsRwAoiWg7geQDXCiHk/+JvA3gYwEYAm2DPA2CYIUl1SWYNgOm1FVbJYPkvADx25RFY+ytj0ks0keqypV+3zxifjygLYpJZxGZrYyTDvf/CMrsTbm19WvTllDefh1Bd4rfmgYddCs6oAwB1gCL3Va3sX/17DR75YAseX5ROJHSS0FO4952NOO6utwEYAydpFfs8BL/PiOnHLW9FyuqPsx6AFP2k1rml//yyOixRE+K0/Kc4Gv0w+iNFP+iTMf1M8Wi0zdBIW8C51kLKVg7ZiRwA/d9bRtVCt6JO1rXzPKcb6boP9pkcquirg5snFm3DT/+1CkDuWH3SkQjYVeTgy5kL46zqeMOp0wEYC/64Ueyyx0UpziOEmJql/QUAL2TZthTAgX3ZL4YZKEgLblhZpujPGF2BwyZWY+XOVlsxFa+HUBIw5thHkhrC8a4VHJJVz0aUBzC8LIASvxe/emWNbZ+EOaVPpVEpCiTXAvB6CBWK9SxdueqDuS2WxIm/eQd3f/lg7GqJWe3SUlVF4CPTwvfmKN8Riet4bMFWW9u+sCH6cmVFVVyiirAEvMY0RdX1D2QrzmNv+9/njII6sgiL9A7km70ftUTf+ByDOSz97UoZ5mhCt/orp5O5rdroZum77Zd0xM1zech7w9K3X9tu6bc5CkrJ32auiIk6SOxOTF+G1JwWvPp9j6kKYe4kY3nt+taYtZiWitPSdy7i1Nf0h+x9hmG6yEVzJwAALjQT9DwEXHnsJABGIt/NZx+AB742B4dNHJZxbHnQh46YlvHwevqbR+GJbxzper2R5UHrwTqyPAgicp3fndBSCPiyP1akS9bnJZQryU4dcSmE6YfxyrpWbG+O4I5X16JuXySjBLI6QJDz/bPV8wcMb4HP8XCVxYF8XjKmISpTw2JJ3VbHXq3KaFn63cjel3kH+bqYnSIhV1l0O77OdC9PGlGKmKZb4itv223Ghpub3jVXwbxXaSXnivHnmxzohpu1bmTvp/u5z1HUyTo2p3u/Z4l81hLLzpi+8ln5vGR519x+i39dsMXm+QL6foEmJyz6DDMAOXBcFbbedTYOHl8FwHDj/+KLs7D+tjPh9RCCPi/OOLDW9djyoA972uJ4ZYU9yW3upOG2ZYBVJgxPrx8gM+//9JXDMvZLOGKvzmPaLUvfg4pgpqWvuqyl+O5piyOpC0x1WE1WjXkh0GqKfq5CM5GEnlGrXw4W/B4P/GZpYSlYcS3t3veblr5EiqkUvvHD0p9PZ67jfOLPKk6rOVdMf3tzBF4PYfLIMmO2hfldyJi+sy4C4G7pu7mg5UBAWO9zJKv1wIXtdl+akmAJAM1h98GdKqDOVQZtiYDdSOST13dm76sDJL/HY13X7XO95d9rMtoKNRVSwqLPMAMYmbAnre9cVrakPOjD/LV7sG6PvRxowOfJWPxGotakl5bMuYeMxcVz7XW1krqwxZXloiJy0CDd+z4P2UIPHY44PVH69e42w7XvtPRtbvg8XOXhuGZlYEv22Sx9ozhPXLH05TX8XnK19OV2dYXEziw3KZZ5W/oOAZXZ+5/tbs+oeLi9OYKx1SGUh/yIaymrrr4UfXUVRiC9nkLGNV0GT1Kc8rH0u+Pe/++GBjSHE665DsmUsHkPmsPuxajUrPyyoD18pfXQvW+tTJmRvZ/ulwwTAV1PkCwULPoMM4CRWeUySz8fVLE9xPQUfH6GkdnvdH+ff+hY/PGSQ1FiCl5Vid82sBhbnbmCoBRpAJg91jj/MHNw0hE3LDQPOWL6juz9EpcFjqaNtov+799aj2XbmjMK/GTDzdKXx8riPAkle7++NYZz7vkAQHZLP2EKifo5dCYolujnEM3WaBJPLNwKIUSGe19a+gCsioeS7c0RTBxeihK/x7T0jX7KVIetTZmi7yZObu55KZry9pzTIwHgHXMZZnXGRj7ENR2XPbIEVz22JKulr4qtzMVwLgCkHiuLUlnn6ME8fSGEFQZxJsCqou3zkvU7yXcqZF+uyugGr7LHMAMYn9eDipDPtlJdZ8g552UBL/71neOwtcmwDgFkeAr+vzNnYkxViVVgZ4RjxUA30d/TlrbCZo01amtJEW9XLX1F9DsSGrY0hvGqabnWVASxTREoD8GaJijZ3BjGl+5fiFevPwH5EI5r8Hvs99diLibkN4vzONcCkOSK6Qe8HoxSBl2qCLi5+uVAIZzQMX/NHpwya3TGPj9/aRVeWr4LM2orXd37zsGZpKE9jikjhyPk9zpi+sb+25rDZrlhow+lAZ+rpe8W5//+P5Zjc0OHspRy5nHPL0svw9wV5KBv494OV3d3Uhc2t3qz6aEJ+DxmZRcDdSDlHBAkemDpa8rqj86Khuo1/V6P9X/IOZhyS450Hl8I2NJnmAHOjWfOtBbVyQf5MKwuDYDIiP8GfYagOcVExielaE8ZaRfe46bal6aVXPf5/fHfH5+Ei+aMx8/PmYVvn2QsEWzF9L129344ruHyRxfj70t24JQDRuOgcVW285X4vShzWfYWSAu3cy69E9eYfthu6cvpgM4ldv2KBQcoMX0tBb+XcOZBY6xtuiIubmu4q1X8vvn4UqvSoa1f5uDjHx/twCtKCV3AcO+r96GKb1xLIej3osTvNc8rY/rG9m2NEauwkrxPd/e+uxCtqW+3svbdLFn5+XU1G12Gd/w+j2vhHC3ltPRN0c8Rt89073e/OI/6GTlXWlQtdVnkCcgcFMm+/ej0Gbblttm9zzBMl7j0qP0wZ7/hee9fZoptZUlmYR+nKMoYv3wwOWcDjKsuwda7zs44br/hpZgwvBQ+rwdfP36ylQcgp1oFTA+FJBLXrcImd1xwIMocrtmg32sNTJzUm9P5pNch25rv4URmTF9m7xsWGlkP9CrHZ+PLYukndGO2wqETqrH5jrMwrrrE9hB3X7XP3vbmmt2Y+bPX8fqq9HoHcvD1wsd12NUas+0f8ntt96FOXzMW5PEg6DfEPL1QkbF9a1MYkxSPiSH6uTP61VmQcU1PW/ou9yZFP1thmmxIl3nA615pUNOFza3eHEnA66GM+gNqUl2Z43egJnl2VWjVZYqdWfk2974nu6UvP5MSv9cq9AMU3r3Pos8wQwwpttUuol9dGsBvLzoE3zEtc2nhrzMXEjl0QrXrOZ2WU3Wp/dylfuOa9aaAVZX4rXr2gGE9eQj4zkn7Y1RlCKUOKy3o89jCASpy+dbaqpB1bjcicd0mCl4PWVPYfB7DQssm+oEsMf1oQrcsO4+HzKz69MPebeqb00K+5eXVAIAVSrli5yBK9Tw4Lf02pUiOrJMgvSLSIk6YK9HtaI7Y1mYoDfjcs/cVS1/1/sSTKSt7383Sl1Mvm11mCeTCsvS9Hvd5+rpAR1yzBiBCGN9RriJAzsGfOhDpqntfDvJCfo81U0RiS+Qzp1MSZX4+ctBSGvDC71M8NQXO3ueYPsMMMeTD0GkJSb48x8jI/9HpM622qaPKsaa+zZoi6MTpLVUFHQBCAUMYZZWy6lI/msPp6zeF40iJ9HEZlr7Pk9WCl/HjMVUl1rlleV+VcEKzudGrS/xoCifMCnceK5EPACpDTkvfnr3/4+dXYEVdCxZvabYlGPo8BFXnVas+qads15BIV776ffgdHonaqhA2N4QByJh+egAiLX0j2cwUffNcsqBPQk+hLaYhnNBtMw1KTPe+EMIq4APYs++9nnQOQEy19F0GNNLSz1UvwQ0ZJw/4PBlWuIcMYY3ENVQEfdYgp6rEjz1tsYxzSZyJfKqnoKs6K0V/VEUIO/ZFbAV17FP2CETpdRzcrl8S8NpW4evJ4kTdgS19hhliSPF0Cksu7rzwILz+/RNyrh5mu4bDKg94DetUWvrVpQHbPjL5r9y08J0DkqDPm3U64ubGMII+D4aZ3gWnYEsiCd1m7cnrS6tePX9lif36zux9AHhy0XZsb47giEnp0IrT0lcFPmrN7Xd/yMuSwMZ57NdSP/egz2Orpf/U4m1oaI9b5w340qGTJnNqW0JLYZc5EFKTL6UHwdknNYatVjmMJ1PWAM8tkU+Kvlo8R42lf7qjBV/43btojyVdj/N7KWMOfYnfC00XCCd0VIT8lrVfGfLlrMvgzMuwu/e7pvoy3DGqIggh7CWInYl8ABB0Gdyp7n01obQrCxn1Biz6DDPEkKLvdCHnoizow8za/Fe5Lne454kIJWac2eshlAW8tuI81nFyZoHTve/P/agaXRmyHvIhZV/76nKa7cFf7shtUJPCnPkOAa8nax+OdIh+QkvhO099jI+2NtusPTln3u0hX1Xix2f17Zi3oh5CCPgd342a+R3ye22i9ezSOvzP3z+2hOn/b+/M4+Sqqjz+O7W82npf00l3OumkE8hKFrIRIIsQiEAQCCPyYRMJMoDIAAoCLjgqo6M4Ljg6gIrD4qCMBHSUsIgshhiEQBBIAtn3pLN0uju9VN3549376r5br6qrk3RXd+p8P5/6dL1b7726dbu7zj17KOB3LCWqZkJHV8KxstSUJAswKW3Y9Oubmr4zrp1nfo7fvbkVe6VZX0+j7IgnsHZnM77/3BosXbUNH+1uwQ4jTkE1XAr6fa5ASACIWAF0xBNo7ehC1EpmLhRFgrh8Rj3SUWWkseobvkyBfJubWlPcEyqFsarIvqduyTBT9gA4hZ50lDUjagVc5n1O2WMYpldRX0zp0r6OBaZ5H7CF1aH2LpREgiAiRL2EvrzONM0qLdssP1ssm/aMqIw55nddSFkBn+OfTif0laavWz5SA/koJZDwljNG4d1tBzG5vtR13pZ9bXh/RzOW/WMnlt54ivNaq4fQv3hqLW6Y24jLHnodL63ZjZfW7Mbn5o1M2ZCZQtgMlNu2/3CyT4Bm3t+rmfdV/4LBxamavimgdJ9+OrO/sgY88PJH+Nffv4d0dHQlcPdTq7H8oybnb87MDmjJYN6PWn7ZZQ+IhgKOu6E4EsRXzh2LqqIwvvOnD1zX3LZgNK6ePRzflc2BANOnn3a6uObhlZg0tATfumBCymetKrQ3TC6hrwlttXG0/L6UTVFrp7d5nyvyMQzTq6gvVdOEfCwxNXUgKWCKpRm+0CMwT11npucpgRs2ivaoKn0jKgucjYIuMHXT9M6D7S7hpt5fpfrpX8Sm0A96aPpzRlfhPy+b4pqT3+dzihOZJYmdRkGaMBhVXYih5VHXWvzx3WQUv8IUkq2GWVtPvdPN+2qjoTT9gI9chZzUOptpe7pw17Vi/bxOqcFnEviA/XlVbIH62zOrDLo0fS+hnxBoae9CQcjv/E4Lw0H4fJRixgeAQUXhlL+V1izN+00tHdjdbGj6cr5q7dyavhb0KDeOloem36YH8nH0PsMwfYXStkwT/LHEDMQDkkJfZQ0UhYP4y21z8d3FE51zCpWmn+LT97l+mvccUVWAiAwW1IW+CrYqjQaxbvchAMDiKbX4n2tnemj6yXubsQuW34ewoelXFKZ2OAz4yG3a1r7439lyAIC7Ha+q2V+oWUY2NbVmFMJA0lXgvK+fnPfSA/kU7V0JbN9/GNVFYdf6ROTvyRRQrvfXZJI+j81NbTjjvr+4rrvm1OEphaI6uhIpVRPNz6N8+mt2NuOmx990vRax/OiMJ9DSEUfUCjjzV797r9LRZmomAOzVykNnUq7buxIpBXgc874U+qo2BGCa9+25BP2UdfR+X+fps3mfYfKMcyYMxgc7m3H9XM8O18cEr+IsDZUxvL+j2ekXAABDy6PYpvUd16sF6igt2xT6SiusKgw5aV+6BUMJiBGVBVgpo/wn1JVg2vAyLF21FYDm09fubbo+An5K0fTLY6lVEM3Wvpv3JasKfuG3byNi+dERj6MwFMA3LhiPBWPtpki6pn+4M5GSfdDWGceT/zwL63Ydco519jR3OELG8sh06OiKY1dzO6qL3HNWlpVMmwxdJOnnbdjTApOSqOVsvpLvnXDqIeifUUeZ971KKkeCdqng1o4uxCx/itC3PAS8V7yKvqaZGu4c7oyndCNU81NBkN2a9wP+lDLFbfIeEcudfcGBfAzD9CpWwIc7zj4xbZR7bzFOVtkzv471gCslhFJ9+vZ4yDDZ3n3OGEwfXoYZDeXOl2/Aw7zfUJksSKNqDyhrhFoH3advaspBD03fK5vAFDart7pr0P/38o3o7BKIhvw4b+Jgx1+uLAsqnW6t0QypvTOByUNLcbFsqWwK/d2H2h3zcSjg9yhBm0Dz4c6UAEW1Fik+fc38rgcR6rJS36wpwkG/U5NBsa+1I0WbTafpexHw+9CZEGhpjyMaCjj3iihN38NNZY6VRINO9gIAxBMJxBPCNQYk0x71+Xx16bt49PVNALyFvi601d+eFfDhufd2YuLXnnVeS2r6AU7ZYxjm+EeV1t2yz/1FW6W181Vm7pTo/TTm/RNrivDra2ciFgo42puezubTNH2F08xHnqaEh/5FPG1YGb5zUTKQK+ih6XthmpWVZq5YsaEJh9q7XO8FJLXWSbIroV5wB0jVTJWA/+Elk3D93BGIJ4STDmkFfAgHfa7MhY6uBJrbu1I2A2pzo/eqB9wNd9KJJC9NPxL0I2xYaXYdTO2IlyL0M1TwC/oIXfGkpq82KOr36GXKNy01g4rCrrTEeAL41h/ew6x7X3BF6itrSasm9H/x2ga8vr4JgN17wvL70kbvBwMqkM9+f/28Vi1lz9KL87BPn2GY4xHVcW+wljIGuCunqXQ7U9NXQVmmpq8zqroQADBrRLkzpjR9l9CX5mfVDEdtEnRBHAr6sFhq1YAdwT64JNJt62Kl6Y+pKUIo4MOHu91CXwhgy/62lPuoYLCJtSUwPAQ4d+JgPPHZma6xqcPsjIHpDWUYP8TeKKi2uaGAD0TkiqtICOBAa2dK8KTa8Jhd9XRNN116m7kxse/nQ0T+DgfJzZxXAR2zkl5mTZ+keT+OWCjZIChp3vewuBgbAbMxVDwh8OSbtntH9/Urt4NZX18RswIojQXRdCi5UdA19aCm6Ss64wl0xhNYs7PZqaboKsPLFfkYhjkeKYtZePSa6TgxQ76/Mnen0/TDGYTuSXUleP1L81FdFMYtT6wCkBTCQ8uTpWeVKV1ZZZXM0L+olSAJBXyOkJkzqhIrvjQf7+9oTiv8lYY5qDiMjnjCpemXxyzsbenA1n1troYrQNJXXhwJojgSdPm27zlvLEpj7vO/fM5YXHXKcFQVhp3c8S0yfkDNrVkKrqFlUWxqasXelo4UTT8qN1Hthl95U1MrhpVHsWFva0q1xUxEgsl0tPryKHYcPIxdze2uzw+4a9kDcHXQMwlqmrW+kVFBiJ6BfKamX+zeaMYTwqlkqHdW1Nvnml3xIrKl8aCisKt9tMu8r6XsKZpaOnDDo3/H3zYkOw+yeZ9hmLxg1oiKFAHmhek/d8z7UkjdeuYoPHPj7JTrqovcX+5nyLa1lQUhvHTbHNx/6WRMkU2DEoY7wDI0fQBYdvPp+OllUwDYG5KSqIUZDeWYbDQeUqgv8BGVMdRrNe5/ftXJ+PlVJwOwC+aY5v0Oj3Q7hVe5ZCvgc6wX5XI9lXnfdIGMqUlussz6Cerepqa/fk8LRg+yLSc9EUnhoN+pVlcvN1pK6OuC1zTvHzycvmxv1PI7mwW9J4PasHhVljRjKwabQl8Ixyyvm/fVOnQlhJ1y6eraF3A+h15cyN1lz+f6CQCvrN3jEvgA2LzPMMzA5sZ5I1PM0j1hYl2JywdtRv8rYX/2ODva/ezxNU5gYCbu+viJ+Osd81Aas1BfHsPC8TXOva+bMwKnj6p0eg3oX9RqAzC0POpE2GfDmp22Zj95aKnLujC2pshlYjYF1WmNlQDsGAU9fc8K+Lp1KZTLFLm0Qn9wUuibGwqVXqcaKgG2qX1XcztGS3dJT6R+JOh3tPJ62c1PmfdrtKJAZvT+jm5q6KvNlFvTT43FUJiBfPp7A+7ARb1ksF6ZsKU97pqnsj7VFEfcQl837/tTzfsqWPCnl03BsptPS5lfVx9H77N5n2GYo+aWM0fjljNHY9jtvz+i65+8blaKOVVHCbJPnlyHheNqnAI/6fjs6SNQWRhCwO9L+cJXVBeF8ctPT3OOyzWTu5fJOBtUWtikoaWuFLGg34fiSNCpKGgKqsVTa/GxMdUoi1muuv+O4M1AzLL7Eqgyu5bfbSUZpFk/zJ4Ig4rDOG1UJX61fCNmN1ZgSn0pNuy1A/ROqCnC5TPrcf6kIbjg/tey+fiIWH4nPW+otHTsdjT9ZJaGrum3tHd5puo5n0/X7rUgwWT0fveavp69AdjuC4Wu6etCvqW9y7WBUhuOQcVhNLd34ZAMjHR12XNS9pLXqQ1NbWkEjfL36a7Ix5o+wzADmCtmpq+Hng6/j1IE7Yo75+MTk4YASH6JElG3Ah8Abj/7BFw9e3iP5jCjoRzLbj7N0caOhLFSqx5UHHZtIoIBH3w+cjRrU3snIpRJM72KObhk2lA87eHCMCEiVMQsp+qdck18+ZwxuHLWMNd7qX4HulXmpvmN2HOoHdc8vBKAbdoHgGHlMdyzaFxaV4YXkaAf+x1N3xb6Ow8eRkEo4Cp4pKcEKk14SIn35kwP6tRjEtQGIOhhCTEtKebGb9PeZObBvpY0mn6Hu2yzer8a6SpQ2r5u3ncq8ml/y+o8/XO4a++zps8wzABlw70fP2b3qioMO9H8vqPxHfSAxiw060w8vmSGk05XphXvUUKguiiEHQcPe5qkFcoEb5rpM1FWYGGbStmT9/603PT84Z3tznlK01928+lYvdWuEDilvhTXzx2BH7/4IeIJ4dTrNwv5eBG1/K6WteGgHw9eMRWPLN/kCNp9rZ2oLY0kUyUBtHUkBd0WKfTry6OeLZH1Qk16pUZ1v6BHnr6p6ZvHm7W00QdeWY/a0giuPGW4K7ahpT0O0qpKKLeQihvZceAwRlYVuDYK6vequ6eU20X/HK4yvKzpMwzD2CiLfx/J/KOmMBx06g6UaZUHleapBGGmtsaqWFBPhL5eHdCsJ1CiWUaUtj2yqgDnSyuKfv3Btk5XEZnuqCuNuo4jlh/zTqjGg1ee7LQ6VnPQhb6Xpj+swm2CV+iCPuZl3vfM0/cZx+TKmFDpjUpr/6+X1zuFeRQPvbIeC76fLDOs3E/KIrGpqRUdXQlXIJ76veoxA8q8H9GFPpv3GYZhUjEj7AcSZZqQUamITvGdtvR56YUeDYC6Q3clmHnrelyAmbKnUBuDA22daOvoApG7RXE6VO8AhS7YA36f08yoNGq5ivYc7ojj0geW4zdvbMHWfW3w+yjlXopoGk1fbUq81snU7H0+wo8/NdmVUVBRYDla+Nb9bfhgZ7Mr1uD3moUEsGsdqM9cHrOwcmNTSo1+VQJaF/oqZiCdef/ltbuduIe+gIU+wzD9FkfTz+00jghd01ecPLwMAPDW5v1pr1OFiDp7ULRFpe35KDUIsVxrgOPV2RBINh3aLzX9SNDvaqmbjroyt6ZvdrZTcQrFkaCrxsL6PS14dd1e3PrEKmze14aqwlBKmqZCj9h3afoZUvbM4D6/jzC9oRxLr0/GSNQUR5wujQDw6rq9KT0IdFQGARFh2vAyrFjflFJJUL2tWdZYFeVRjKgscDY5r67bi6fe2pr2fY81LPQZhum3KMPnAFT0XeZchSpFbEbR6yizvik4MqFavnZnKe5O09/f2oG2zrhnu1ovdO3cMgSbfV9b6JdGLdd6fKSV8H161TaURq20qYkuTV/bAKj3ykbTV5sA3eo/uCSMx5fMwPO3nA4f2QF97V3piwTp2SXTh5dhy742vPj+Ltc5yiJldtgz13PB2EF45YvznGOzeFBvwkKfYZh+S9KnPwClvgdBvw+PfmY6nrh2ZtpzlNDPpHWajM5Q5RAAJtTam410wrw4Ygtn27wfT9HYFUE/4ZufGO8c6xH3EY9rlKZv+vS9zvMqpwski+JYfu+aBV4+fXMjoISxvhmoKY6goiCEEZUFKAgFcKi9K6V+gI6+n1p00hA0VMRw1+9W23MLqIBT+3Vz89BdfIRZVKo3yZnQJ6KvE9HbRPQWET1LRIPlOBHRD4honXx9snbNFUS0Vj6uyNXcGYbpG4bIOv3lWVTxGyjMGlmRNmgNSHYU7ImmP76bQkWPfGY6/u+mU9NunpR5/4A075ubg29fOAGXzajH2m8sxKemD3XGJ9eX4tTGCjRUxjyFemlUCX0r5XV9w1ASDXq2YwaSG5VoKM1GpAfR+/p4ZaHu9gii+XBXRk1fn29pzMIFk5OBkOqzqfXtlG111bp2ZzkZlA9CH8B3hBAThBAnAXgGwJfl+NkAGuVjCYCfAAARlQH4CoDpAKYB+AoRZZ9AyjDMgOPG+Y24/9LJmHdCVa6n0meuPDd8AAAMjElEQVSo6PueaPpl3WyKCsNBnFiT3hrg+PRbO9HaGXfq2isuPrkOXz9/XMp11UVh/Orq6RhcHPF0Z6gI/tJoMKVZ0vSGMlegn1djH6Kkph9Loy175embPn116Nc2PXp8QGE4gObDnY6mb25Q7lk0FvdeOME1VqzFbKigR3V3FbCp6jZ0J/T1DUhvk7M8fSGE3mg6hqT1ZBGAh4XtQFlORCVEVANgDoBlQogmACCiZQDOAvBY382aYZi+JOj3YeH4mlxP44i5c+GJTiOcbJlUZ+sy507o2ecuDAWcJjs9xQr4ELX8TvR+NIMpHgCW3zHf1YmusjDkaZkozWDeXzylDm9s3IeDh7tQGrOcroc6PqKkpi9/XjdnhJNyB6SpyGeY/MnDvG8W/TnUbmv6fh+hKBJwFeZZPKUuZVNTEkmmJKrPptwIty4YjQun1OLpVdvw2od7M3aHBFIDIHuTnBbnIaJvALgcwAEAc+XwEACbtdO2yLF04173XQLbSoChQ4d6ncIwDNPrXHNaQ4+vGVoePaIiR6/eMS+lcU5PKJHd/Vo74hhUlLnqoRl49tVzx6YErwG6T9/CsIooThtViQlDivHSmt2Y0VDmCOzSaNAzX91HSeGs0vW+eNYJrnO8a+97uwp094buLigMB7DnUAfaOxMIB3zSupBMo/OqmaDXP1CWEXX7oN+HUdWFTgXGnrhqepteNe8T0XNEtNrjsQgAhBB3CiHqADwC4IZj9b5CiJ8JIaYKIaZWVlYeq9syDMP0W4rCwaMyExdFgtjb0o62jrinqT4TxVHv955YW4KGyhgaKmKIWgE8/OlpuHXBaDx942wQkaMZl8WS5v0Ta4pw24LRAGwh7fcRQgEfCtL49E3/fboxE91dUBAO2oF8XXGEgv4UV4JXvEFJJGnej6SpHKnqJ7R1pI8V6Gt6VegLIT4mhBjn8XjKOPURABfK51sB1Gmv1cqxdOMMwzDMUTJ9eBleXrsHH+1pyTplrzvGDC7CC7fMcVL3TJRwLolaTqbGjIYyp/Oh8sHHQoGsKgQqzIp8XkRTfPpdaO9MICRdHd1RrJn31YbHtAioSoctHd5ulwm1xbh0et9ao3Nm3ieiRiHEWnm4CMD78vlSADcQ0eOwg/YOCCG2E9GfAHxTC947E8AdfTpphmGY45Qb5zfi0RWbEEd2JXiPBUozLgoHsHhqLd7ZegCfm9eIuFOJ0T6vosByTOXZoDYTQ0oinvX8AfdnLAzZgXytMl0xXT0DHb3x070XTMCE2k2Y0VDuOqeyMLOmv/SG7hsqHWty6dO/l4hGA0gA2Ajgs3L8DwAWAlgHoBXAVQAghGgioq8D+Js87x4V1McwDMMcHRUFIdSVRvHRnpYem/ePlJFVBfjH9oMokJr8vy+eCCDZ+U5tCh684uRuBfHU+lKs3LjPNfbMjbOxs/mw5/m6T78gFEB7VwKvr2/CtOGlTjndTBRq8ymNWbh+7siUc1TTJT0oMNfkMnr/wjTjAsD1aV57CMBDvTkvhmGYfKW6KGyb9/somvxbF4zHwvGDUrobKte48qWb5X5N3rz7DEQsP8770StYs/OQM14as5wMAhPdb6/KE+851I7ZIyvx9pb0ZZIV6eoK6JREgiiPWSnBh7mEW+syDMMwAIAK6ZvuK00/FgrgrHGpqYmqpW1NluVplWB/4tpZ2H7Q25xv4tL0w0lT/amNFfhwt71xWHJaA646ZVhW9/PC5yO8cfcZR3x9b8BCn2EYhgHQfyofFkeD+O7iiZjdWNHj63RfeyZ0a4ZyHVgBH+rKok7hniElEacd8vECC32GYRgGAJye83ulTz2XXCgj+HsLvRuhJVvdXjVrGIBkTYDuLB7fvmiCuyj/AICFPsMwDAMAGCtr+GdrVj9eOH1UFX54ySScPW4QgGTp30xNggDg4ql1GV/vj7DQZxiGYQAAc0dX4bfXzXRKAecLfh/h3ImDnWNl3u9O6A9EWOgzDMMwDlPqy3I9hZzjlP7to4DGviSXXfYYhmEYpt/RWF2A8piF+gwtkAcqrOkzDMMwjMaIyoJ+l2p3rGBNn2EYhskr+ktqYi5gTZ9hGIbJG9792oKsuvAdr7DQZxiGYfKGWBbNdI5n2LzPMAzDMHkCC32GYRiGyRNY6DMMwzBMnsBCn2EYhmHyBBb6DMMwDJMnsNBnGIZhmDyBhT7DMAzD5Aks9BmGYRgmT2ChzzAMwzB5Agt9hmEYhskTSAiR6zn0KkS0G8DGY3jLCgB7juH98gleu6OD1+/I4bU7Onj9jpxcrF29EKLS64XjXugfa4hopRBiaq7nMRDhtTs6eP2OHF67o4PX78jpb2vH5n2GYRiGyRNY6DMMwzBMnsBCv+f8LNcTGMDw2h0dvH5HDq/d0cHrd+T0q7Vjnz7DMAzD5Ams6TMMwzBMnsBCP0uI6Cwi+oCI1hHR7bmeT3+EiB4iol1EtFobKyOiZUS0Vv4sleNERD+Q6/k2EU3O3cxzDxHVEdGLRPQPInqXiG6S47x+3UBEYSJaQUSr5Np9TY4PJ6LX5Rr9mogsOR6Sx+vk68NyOf/+AhH5iehNInpGHvP6ZQERbSCid4joLSJaKcf67f8tC/0sICI/gB8DOBvAGACXENGY3M6qX/ILAGcZY7cDeF4I0QjgeXkM2GvZKB9LAPykj+bYX+kCcIsQYgyAGQCul39jvH7d0w5gnhBiIoCTAJxFRDMA/BuA+4QQIwHsA3C1PP9qAPvk+H3yPAa4CcB72jGvX/bMFUKcpKXm9dv/Wxb62TENwDohxEdCiA4AjwNYlOM59TuEEH8B0GQMLwLwS/n8lwDO18YfFjbLAZQQUU3fzLT/IYTYLoT4u3zeDPvLdwh4/bpFrsEheRiUDwFgHoDfyHFz7dSa/gbAfCKiPppuv4SIagF8HMAD8pjA63c09Nv/Wxb62TEEwGbteIscY7qnWgixXT7fAaBaPuc1TYM0l04C8Dp4/bJCmqbfArALwDIAHwLYL4Tokqfo6+OsnXz9AIDyvp1xv+P7AL4AICGPy8Hrly0CwLNE9AYRLZFj/fb/NtCXb8bkN0IIQUScLpIBIioA8FsAnxdCHNQVKF6/9Agh4gBOIqISAP8L4IQcT2nAQETnANglhHiDiObkej4DkNlCiK1EVAVgGRG9r7/Y3/5vWdPPjq0A6rTjWjnGdM9OZb6SP3fJcV5TAyIKwhb4jwghnpTDvH49QAixH8CLAGbCNp0qxUZfH2ft5OvFAPb28VT7E6cAOI+INsB2Xc4D8B/g9csKIcRW+XMX7A3nNPTj/1sW+tnxNwCNMprVAvBJAEtzPKeBwlIAV8jnVwB4Shu/XEazzgBwQDOH5R3SJ/oggPeEEN/TXuL16wYiqpQaPogoAuAM2DERLwK4SJ5mrp1a04sAvCDyuGCJEOIOIUStEGIY7O+2F4QQl4LXr1uIKEZEheo5gDMBrEZ//r8VQvAjiweAhQDWwPYV3pnr+fTHB4DHAGwH0AnbV3U1bF/f8wDWAngOQJk8l2BnRHwI4B0AU3M9/xyv3WzYvsG3AbwlHwt5/bJauwkA3pRrtxrAl+V4A4AVANYBeAJASI6H5fE6+XpDrj9Df3kAmAPgGV6/rNerAcAq+XhXyYb+/H/LFfkYhmEYJk9g8z7DMAzD5Aks9BmGYRgmT2ChzzAMwzB5Agt9hmEYhskTWOgzDMMwTJ7AQp9hmB5BRHfKbnZvy85i04no80QUzfXcGIbJDKfsMQyTNUQ0E8D3AMwRQrQTUQUAC8BrsHOO9+R0ggzDZIQ1fYZhekINgD1CiHYAkEL+IgCDAbxIRC8CABGdSUR/JaK/E9ETsqeA6j3+bdl/fAURjczVB2GYfISFPsMwPeFZAHVEtIaI7iei04UQPwCwDXZP8blS+78LwMeEEJMBrATwL9o9DgghxgP4EezubgzD9BHcZY9hmKwRQhwioikATgUwF8Cvieh247QZAMYAeFV2CbQA/FV7/THt5329O2OGYXRY6DMM0yOE3cb2zwD+TETvINlYREEAlgkhLkl3izTPGYbpZdi8zzBM1hDRaCJq1IZOArARQDOAQjm2HMApyl8vO5GN0q75J+2nbgFgGKaXYU2fYZieUADgh7KVbRfsTmtLAFwC4I9EtE369a8E8BgRheR1d8HuUgkApUT0NoB2eR3DMH0Ep+wxDNNnENEGcGofw+QMNu8zDMMwTJ7Amj7DMAzD5Ams6TMMwzBMnsBCn2EYhmHyBBb6DMMwDJMnsNBnGIZhmDyBhT7DMAzD5Aks9BmGYRgmT/h/s/11r0zmz84AAAAASUVORK5CYII=\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Advanced optimization with transparency\n" + ], + "metadata": { + "id": "sKN4xD6Cz-xL" + } + }, + { + "cell_type": "markdown", + "source": [ + "While the simple optimization above using `opt.loss.ChannelActivation` works for optimizing the alpha channel, we can do better in a variety of ways. For example, using `NaturalImage` as a target means that we miss out on the random image transforms that can improve visualization quality.\n", + "\n", + "Below we define a special loss objective for optimizing our alpha channel, using transform robustness. We also add a `CenterCrop()` transform to encourage the visualization to avoid the edges of the image." + ], + "metadata": { + "id": "Dmpiqunk_LmO" + } + }, + { + "cell_type": "code", + "source": [ + "@opt.loss.loss_wrapper\n", + "class AlphaChannelLoss(opt.loss.BaseLoss):\n", + " \"\"\"\n", + " Optimize the alpha channel of an image parameterization.\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " target: torch.nn.Module,\n", + " crop_size: Tuple[int, int],\n", + " scale_list: List[float],\n", + " batch_index: Optional[int] = None,\n", + " ) -> None:\n", + " \"\"\"\n", + " Args:\n", + "\n", + " crop_size (Tuple[int, int]): The desired random crop size to use.\n", + " scale_list (list of float): A list of scale values to randomly select from\n", + " when rescaling the input.\n", + " batch_index (int, optional): The target batch index to use.\n", + " Default: None\n", + " \"\"\"\n", + " opt.loss.BaseLoss.__init__(self, target, batch_index)\n", + " assert len(crop_size) == 2\n", + " self.random_scale = opt.transforms.RandomScale(scale_list)\n", + " self.random_crop = opt.transforms.RandomCrop(crop_size=crop_size)\n", + "\n", + " def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:\n", + " activations = targets_to_values[self.target]\n", + " activations = activations[self.batch_index[0] : self.batch_index[1], :, ...]\n", + " assert activations.dim() == 4\n", + " assert activations.shape[1] == 4\n", + "\n", + " alpha_mean = activations[:, 3:, ...].clone().mean()\n", + "\n", + " # Randomly scale the image and then randomly crop it\n", + " scaled_alpha = self.random_scale(activations[:, 3:, ...].clone())\n", + " cropped_alpha_mean = self.random_crop(scaled_alpha).mean()\n", + "\n", + " loss = (1.0 - alpha_mean) * 0.5\n", + " return loss + (1.0 - cropped_alpha_mean)" + ], + "metadata": { + "id": "pc7MGUKM2MqT" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now we can render the results using the `AlphaChannelLoss()` objective!" + ], + "metadata": { + "id": "mAwfOLftBYck" + } + }, + { + "cell_type": "code", + "source": [ + "image_size = (320, 320)\n", + "crop_size = (150, 150)\n", + "scale_list = [0.6, 0.7, 0.8, 0.9, 1.0, 1.1]\n", + "\n", + "# Initialize NaturalImage with 4 channels\n", + "image = opt.images.NaturalImage(image_size, channels=4).to(device)\n", + "\n", + "# Set optimization target\n", + "loss_fn = opt.loss.ChannelActivation(model.mixed4d.conv_3x3_reduce, channel_index=139)\n", + "\n", + "# Use NaturalImage output as target, for alpha channel loss objective\n", + "loss_fn = loss_fn * AlphaChannelLoss(image, crop_size=crop_size, scale_list=scale_list)\n", + "\n", + "# Setup transforms\n", + "transforms = [\n", + " opt.transforms.TransformationRobustness(),\n", + " # Blend the alpha channel into the image using random backgrounds &\n", + " opt.transforms.BlendAlpha(),\n", + " # Center crop the image to encourage visualizations in the image center\n", + " opt.transforms.CenterCrop(crop_size),\n", + "]\n", + "\n", + "# Render visualization\n", + "img_advanced, history_advanced = visualize(\n", + " model, loss_fn, image, transforms=transforms, n_iter=512\n", + ")\n", + "\n", + "# Show visualization on multiple backgrounds\n", + "# The backgrounds are as follows: No transparency, checkerboard, white, & black\n", + "opt.images.show(create_mosaic(img_advanced), images_per_row=2, figsize=(14, 14))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 824, + "referenced_widgets": [ + "b9b1828c563c4cd184f26fa5590b3f5d", + "03a3658f7c2e499f9528d3376ac6b203", + "6717308b8d6148d9a9c8747164b791b6", + "53a11c21782140afa93165abf2f97e76", + "b91e276e9fb24ebb804eb5605707874b", + "6dd3c9c30bb246cdbb364456cd1bf5e8", + "5017968b4ae742d5b8320942b325e707", + "92994846e32f4fd4a079444319362f1a", + "35d3a18dfd08421ba1543031b5fb8cab", + "3952b6f664e94cf8ad7edaf249a17d1b", + "b6e7d16af29a4e43ac54a249e843d973" + ] + }, + "id": "37jeXKau1prg", + "outputId": "b5c05ffc-2eef-40ee-dbf2-c506c12c879b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/512 [00:00" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The visualization is now nicely centered in the images.\n", + "\n", + "We can also easily visualize the alpha channel as white regions on a black background like this." + ], + "metadata": { + "id": "DNfyVL9K0bHN" + } + }, + { + "cell_type": "code", + "source": [ + "opt.images.show(composite_alpha_only(img_advanced), figsize=(6.5, 6.5))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 384 + }, + "id": "PsCu_Waa0Vwi", + "outputId": "36754300-1af4-4cb6-c416-3ce39454966f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "When we look at the history graph, we can see that the optimization process performed even better with our improved `AlphaChannelLoss()` objective!" + ], + "metadata": { + "id": "Tl9zHwfH-9a-" + } + }, + { + "cell_type": "code", + "source": [ + "# Plot loss vs iterations & previous loss\n", + "plot_loss(\n", + " history=[history_basic, history_advanced],\n", + " title=\"Alpha Channel Optimization\",\n", + " labels=[\"Basic\", \"Advanced\"],\n", + " figsize=(8,5),\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 350 + }, + "id": "tsA90jBb6bLz", + "outputId": "24cfea81-9cd9-4fb7-b865-0150cad4fcb9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Two Stage Optimization with Transparency\n", + "\n", + "In addition to using the `BlendAlpha()` transform for RGBA images, we can also simply cut off and ignore the alpha channel by using the `IgnoreAlpha()` transform. This is useful for example when we want to ignore the alpha channel for the first step of two step optimization, so that the first stage of optimization occurs without the influence of the alpha channel.\n", + "\n", + "We can then perform two stage optimization with transparency like so." + ], + "metadata": { + "id": "WzRHPcVLA0QT" + } + }, + { + "cell_type": "markdown", + "source": [ + "We render stage 1 without the alpha channel using the `IgnoreAlpha()` transform." + ], + "metadata": { + "id": "gg8-vvF7Za9f" + } + }, + { + "cell_type": "code", + "source": [ + "image_size = (112, 112)\n", + "\n", + "# Initialize NaturalImage with 4 channels\n", + "image = opt.images.NaturalImage(image_size, channels=4).to(device)\n", + "\n", + "# Other targets to explore\n", + "# target=model.mixed3a.conv_3x3; channel_index=76\n", + "# target=model.mixed3a.conv_3x3_reduce_relu; channel_index=76 - 64\n", + "# target=model.mixed4d.conv_3x3_reduce; channel_index=139\n", + "\n", + "# Car Tire\n", + "target = model.mixed4b\n", + "channel_index = 373\n", + "\n", + "# Set main optimization target\n", + "loss_fn = opt.loss.NeuronActivation(target, channel_index=channel_index)\n", + "\n", + "# Basic transforms applied to both stages\n", + "basic_transforms = [opt.transforms.TransformationRobustness()]\n", + "\n", + "# Ignore the alpha channel for stage 1\n", + "stage_one_transforms = basic_transforms + [opt.transforms.IgnoreAlpha()]\n", + "\n", + "# Render stage 1 visualization\n", + "image, stage_one_history = visualize(\n", + " model,\n", + " loss_fn,\n", + " image,\n", + " transforms=stage_one_transforms,\n", + " n_iter=256,\n", + " return_image_instance=True,\n", + ")\n", + "# Save a copy of the image parameterization in its current state\n", + "stage_one_img = image().clone().detach()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "cee03ddb22f84eefa613c6446234c6c4", + "2bb9a8610f0e4d8b91d054cfe9140801", + "f825760c27ee4b80830654f3c02ae65b", + "fafbc35e64814fa4b13e5da2f643dddd", + "5b9280650f144ff882e0d329ff4cb5bc", + "084a58aa0af344a2b2a3fcafa838811c", + "f1f53143baa94a89817ff46acece5054", + "ddc620d6a2c042789bda344dc94b5017", + "1ed5c534ec334eec8d144f912e6beb23", + "84afeb12ab79493a8aa8e3040323216d", + "0dbfdbf943244faea948bfafc16c4a2f" + ] + }, + "id": "aFPWICceYzqw", + "outputId": "36f0ceb5-7b23-41f5-9801-bf18f72033f6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/256 [00:00" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Stage 2 Visualization\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "And we can see the loss graph for both stages like so:" + ], + "metadata": { + "id": "nAd9a-flalLt" + } + }, + { + "cell_type": "code", + "source": [ + "# Plot loss vs iterations\n", + "plot_loss([stage_one_history, stage_two_history], labels=[\"Stage 1\", \"Stage 2\"])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 279 + }, + "id": "fqEpq0geqPd5", + "outputId": "cbae9836-3900-4ac8-e2d2-b79f394e2ffe" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Single Stage vs Two Stage Comparison\n", + "\n", + "We can also see how single stage optimization compares to two stage optimization." + ], + "metadata": { + "id": "bDyY-lhT2HAS" + } + }, + { + "cell_type": "code", + "source": [ + "image_size = (112, 112)\n", + "\n", + "# Initialize NaturalImage with 4 channels\n", + "image = opt.images.NaturalImage(image_size, channels=4).to(device)\n", + "\n", + "# Set optimization target\n", + "target = model.mixed4b\n", + "channel_index = 373\n", + "\n", + "# Set optimization target\n", + "loss_fn = opt.loss.NeuronActivation(target, channel_index=channel_index)\n", + "\n", + "# Setup transforms, & blend the alpha channel into the image using random backgrounds\n", + "transforms = [opt.transforms.TransformationRobustness(), opt.transforms.BlendAlpha()]\n", + "\n", + "# Use transformed output as target\n", + "loss_fn = loss_fn * (1.0 - opt.loss.ChannelActivation(transforms[0], channel_index=3))\n", + "\n", + "\n", + "# Render visualization\n", + "neuron_img, history_advanced = visualize(\n", + " model, loss_fn, image, transforms=transforms, n_iter=512\n", + ")\n", + "\n", + "# Show single stage visualization on multiple backgrounds\n", + "print(\"Single Stage Visualization\")\n", + "opt.images.show(create_mosaic(neuron_img), images_per_row=4, figsize=(15, 10))\n", + "\n", + "# Show two stage visualization on multiple backgrounds\n", + "print(\"Two Stage Visualization\")\n", + "opt.images.show(create_mosaic(stage_two_img), images_per_row=4, figsize=(15, 10))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 537, + "referenced_widgets": [ + "95d38ecf0e3f42d285b3b72179601f70", + "323d89c37c62400ca33f194b44ae74d0", + "baf6d0f46126420395bd64ec76a704d6", + "0c99c38f17544da997a575538dd2e5f0", + "4d3ba63fda70437a9bc0770e6214f1c6", + "99f161d1f27144ec8721c8dd6e841da6", + "6742449d54ea4997b5b85082b7d12efd", + "9ad0d9e48e7a4a7ba7f66cec35a8eacd", + "d7c6b875af764e0a9aac393bb539acf3", + "27d1bfac70e64b04925375e57162aaae", + "cf4d1a9836814fab81ca7688a66d5fab" + ] + }, + "id": "VkQG2GCrS54d", + "outputId": "24623a4b-050f-47e1-a98c-35bc71d5e39f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/512 [00:00" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Two Stage Visualization\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "You can see that using two stage visualization can help reveal important areas of the visualization that the single stage misses, while producing better quality visualizations." + ], + "metadata": { + "id": "ZkupbmiqOFuw" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Optimization with alpha channel blur\n", + "\n", + "In more recent research like [Goh, et al., \"Multimodal Neurons in Artificial Neural Networks\", Distill, 2021.](https://distill.pub/2021/multimodal-neurons/), alpha transparency optimization has been performed by using blurring penalties.\n", + "\n", + "Below we define a blurring penalty objective called `BlurActivations`, and a second penalty objective called `MeanAlphaChannelPenalty`." + ], + "metadata": { + "id": "TNEviEvlLTXj" + } + }, + { + "cell_type": "code", + "source": [ + "@opt.loss.loss_wrapper\n", + "class MeanAlphaChannelPenalty(opt.loss.BaseLoss):\n", + " \"\"\"\n", + " Mean alpha channel loss penalty for optimizing with transparency.\n", + "\n", + " This objective essentially the same thing as taking the square root of the\n", + " DeepDream objective, but only for the alpha channel. The square root of the output\n", + " is then calculated.\n", + "\n", + " Basically the same as this, but for the alpha channel only:\n", + " loss_fn = DeepDream(target) ** (1/2)\n", + "\n", + " Used in the https://distill.pub/2021/multimodal-neurons/ paper for optimizing with\n", + " transparency, in the supplementary code here:\n", + " https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " target: torch.nn.Module,\n", + " batch_index: Optional[int] = None,\n", + " ) -> None:\n", + " \"\"\"\n", + " Args:\n", + "\n", + " target (nn.Module): A target layer instance.\n", + " batch_index (int, optional): The index of activations to optimize if\n", + " optimizing a batch of activations. If set to None, defaults to all\n", + " activations in the batch.\n", + " Default: None\n", + " \"\"\"\n", + " opt.loss.BaseLoss.__init__(self, target, batch_index)\n", + "\n", + " def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:\n", + " activations = targets_to_values[self.target]\n", + " assert activations.dim() == 4 and activations.shape[1] == 4\n", + " activations = activations[self.batch_index[0] : self.batch_index[1]]\n", + " return torch.sqrt(torch.mean(activations[:, 3:] ** 2))\n", + "\n", + "\n", + "def _conv_blur(x: torch.Tensor, k: int = 3) -> torch.Tensor:\n", + " \"\"\"\n", + " Blur an input tensor, as per the Lucid supplementary code for\n", + " Olah, et al., \"Feature Visualization\", Distill, 2017:\n", + " https://distill.pub/2017/feature-visualization/\n", + "\n", + " See here for more details:\n", + " https://github.com/tensorflow/lucid/blob/master/lucid/optvis/objectives.py#L261\n", + "\n", + " Args:\n", + "\n", + " x (torch.Tensor): A NCHW tensor to blur.\n", + " k (int, optional): The desired filter height / width to use.\n", + "\n", + " Returns:\n", + " x (torch.Tensor): A blurred version of the input tensor.\n", + " \"\"\"\n", + " assert x.dim() == 4\n", + " channels = x.shape[1]\n", + " k = torch.zeros([channels, channels, k, k], device=x.device)\n", + " for ch in range(channels):\n", + " k_ch = k[ch, ch, :, :]\n", + " k_ch[:, :] = 0.5\n", + " k_ch[1:-1, 1:-1] = 1.0\n", + " return F.conv2d(x, k, padding=\"same\") / F.conv2d(\n", + " torch.ones_like(x), k, padding=\"same\"\n", + " )\n", + "\n", + "\n", + "@opt.loss.loss_wrapper\n", + "class BlurActivations(opt.loss.BaseLoss):\n", + " \"\"\"\n", + " This objective was used in early feature visualization research, and more recently\n", + " for alpha channel optimization.\n", + "\n", + " Used in the https://distill.pub/2021/multimodal-neurons/ paper for optimizing with\n", + " transparency, in the supplementary code here:\n", + " https://github.com/openai/CLIP-featurevis/blob/master/example_facets.py\n", + "\n", + " See Nguyen, et al., 2015 for the origins of the idea:\n", + " https://arxiv.org/abs/1412.1897\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " target: torch.nn.Module,\n", + " channel_index: Optional[int] = None,\n", + " blur_fn: Optional[Callable] = None,\n", + " batch_index: Optional[int] = None,\n", + " ) -> None:\n", + " \"\"\"\n", + " Args:\n", + "\n", + " target (nn.Module): A target layer instance.\n", + " channel_index (int, optional): Optionally only blur a specific channel.\n", + " If set to None, all channels will be blurred.\n", + " Default: None\n", + " blur_fn (Callable, optional): A function or class instance that blurs\n", + " input tensors. If set to None, the _conv_blur function is used.\n", + " Default: None\n", + " batch_index (int, optional): The index of activations to optimize if\n", + " optimizing a batch of activations. If set to None, defaults to all\n", + " activations in the batch.\n", + " Default: None\n", + " \"\"\"\n", + " opt.loss.BaseLoss.__init__(self, target, batch_index)\n", + " self.channel_index = channel_index\n", + " self.blur_fn = blur_fn or _conv_blur\n", + "\n", + " def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor:\n", + " activations = targets_to_values[self.target]\n", + " activations = activations[self.batch_index[0] : self.batch_index[1]]\n", + " if self.channel_index is not None:\n", + " activations = activations[:, self.channel_index : self.channel_index + 1]\n", + " activations_blurred = self.blur_fn(activations.detach())\n", + " return 0.5 * torch.sum((activations - activations_blurred) ** 2)" + ], + "metadata": { + "id": "2kzA7TMvLTqb" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "We render the results using our custom loss objectives." + ], + "metadata": { + "id": "q7qEk9SLc1RC" + } + }, + { + "cell_type": "code", + "source": [ + "image_size = (112, 112)\n", + "\n", + "# Initialize NaturalImage with 4 channels\n", + "image = opt.images.NaturalImage(image_size, channels=4).to(device)\n", + "\n", + "# Set optimization target\n", + "target = model.mixed4b\n", + "channel_index = 373\n", + "\n", + "# Setup main loss objective\n", + "loss_fn = opt.loss.NeuronActivation(target, channel_index=channel_index)\n", + "\n", + "# Setup transforms, & blend the alpha channel into the image using random backgrounds\n", + "transforms = [opt.transforms.TransformationRobustness(), opt.transforms.BlendAlpha()]\n", + "\n", + "# Use transformed output as target for additional loss objectives\n", + "loss_fn = loss_fn - MeanAlphaChannelPenalty(transforms[0])\n", + "loss_fn = loss_fn - (9 * BlurActivations(transforms[0], channel_index=3))\n", + "\n", + "\n", + "# Render visualization\n", + "neuron_img, history_advanced = visualize(\n", + " model, loss_fn, image, transforms=transforms, n_iter=512\n", + ")\n", + "\n", + "\n", + "# Show results\n", + "opt.images.show(create_mosaic(neuron_img), images_per_row=4, figsize=(15, 10))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 275, + "referenced_widgets": [ + "3f4b2348efa0443ab3c29300b85f29e8", + "fe953f251ac24f8b912db5cf4f9864e3", + "5601082b45ce4996acd41e91921243c2", + "82e4a1dbe4944e28bbab6ea2e8ad5661", + "3137aeea1e504d1f842dd8e65667bc70", + "e306b531228a441491fbdfccb9522fdc", + "0317501458264f4e822b3486207f8019", + "aeff5916a0e140e3a254d2bf7e2fd60b", + "6b3d9810d08b4ce190d7c3a801a345e8", + "f06b61f3847b477487f4359bf855c4d1", + "55c305b5b8ed407f972fd2b775a5d18c" + ] + }, + "id": "sRvMrq0UTIRS", + "outputId": "147ad3b3-19d6-45f3-de65-83e917528716" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/512 [00:00" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "We can also see that the alpha channel for this visualization is rather different from what is produced by other alpha channel optimization strategies." + ], + "metadata": { + "id": "ZFFsYCR2PfE2" + } + }, + { + "cell_type": "code", + "source": [ + "opt.images.show(composite_alpha_only(neuron_img), figsize=(4, 4))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 248 + }, + "id": "HLRL4zhETRMP", + "outputId": "5b16abae-c5e8-48d3-c176-14a9bbac2a16" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + } + ] +} \ No newline at end of file From e2e58da0a0561dd5b38944d40509e9d981c20570 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 17 May 2022 18:48:04 -0600 Subject: [PATCH 009/174] Improve loss objective docs + batch_index --- captum/optim/_core/loss.py | 372 ++++++++++++++++++++++++------------- 1 file changed, 238 insertions(+), 134 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 66bb4c40c2..682e5b44eb 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -141,14 +141,18 @@ class BaseLoss(Loss): def __init__( self, target: Union[nn.Module, List[nn.Module]] = [], - batch_index: Optional[int] = None, + batch_index: Optional[Union[int, List[int]]] = None, ) -> None: super(BaseLoss, self).__init__() self._target = target if batch_index is None: self._batch_index = (None, None) + elif isinstance(batch_index, (list, tuple)): + self._batch_index = tuple(batch_index) else: self._batch_index = (batch_index, batch_index + 1) + assert all([isinstance(b, (int, type(None))) for b in self._batch_index]) + assert len(self._batch_index) == 2 @property def target(self) -> Union[nn.Module, List[nn.Module]]: @@ -197,10 +201,14 @@ class LayerActivation(BaseLoss): their original form. Args: - target (nn.Module): The layer to optimize for. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set to + None, defaults to all activations in the batch. index ranges should be + in the format of: [start, end]. + Default: None """ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: @@ -215,18 +223,26 @@ class ChannelActivation(BaseLoss): Maximize activations at the target layer and target channel. This loss maximizes the activations of a target channel in a specified target layer, and can be useful to determine what features the channel is excited by. - - Args: - target (nn.Module): The layer to containing the channel to optimize for. - channel_index (int): The index of the channel to optimize for. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. """ def __init__( - self, target: nn.Module, channel_index: int, batch_index: Optional[int] = None + self, + target: nn.Module, + channel_index: int, + batch_index: Optional[Union[int, List[int]]] = None, ) -> None: + """ + Args: + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + channel_index (int): The index of the channel to optimize for. + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set to + None, defaults to all activations in the batch. index ranges should be + in the format of: [start, end]. + Default: None + """ BaseLoss.__init__(self, target, batch_index) self.channel_index = channel_index @@ -250,19 +266,6 @@ class NeuronActivation(BaseLoss): from the specified layer. This loss is useful for determining the type of features that excite a neuron, and thus is often used for circuits and neuron related research. - - Args: - target (nn.Module): The layer to containing the channel to optimize for. - channel_index (int): The index of the channel to optimize for. - x (int, optional): The x coordinate of the neuron to optimize for. If - unspecified, defaults to center, or one unit left of center for even - lengths. - y (int, optional): The y coordinate of the neuron to optimize for. If - unspecified, defaults to center, or one unit up of center for even - heights. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. """ def __init__( @@ -271,8 +274,27 @@ def __init__( channel_index: int, x: Optional[int] = None, y: Optional[int] = None, - batch_index: Optional[int] = None, + batch_index: Optional[Union[int, List[int]]] = None, ) -> None: + """ + Args: + + target (nn.Module): The layer instance containing the channel to optimize for. + channel_index (int): The index of the channel to optimize for. + x (int, optional): The x coordinate of the neuron to optimize for. If + unspecified, defaults to center, or one unit left of center for even + lengths. + Default: None + y (int, optional): The y coordinate of the neuron to optimize for. If + unspecified, defaults to center, or one unit up of center for even + heights. + Default: None + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set to + None, defaults to all activations in the batch. index ranges should be + in the format of: [start, end]. + Default: None + """ BaseLoss.__init__(self, target, batch_index) self.channel_index = channel_index self.x = x @@ -305,10 +327,14 @@ class DeepDream(BaseLoss): referred to as 'Deep Dream'. Args: - target (nn.Module): The layer to optimize for. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set to + None, defaults to all activations in the batch. index ranges should be + in the format of: [start, end]. + Default: None """ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: @@ -328,10 +354,14 @@ class TotalVariation(BaseLoss): often used to remove unwanted visual artifacts. Args: - target (nn.Module): The layer to optimize for. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set to + None, defaults to all activations in the batch. index ranges should be + in the format of: [start, end]. + Default: None """ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: @@ -346,22 +376,26 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: class L1(BaseLoss): """ L1 norm of the target layer, generally used as a penalty. - - Args: - target (nn.Module): The layer to optimize for. - constant (float): Constant threshold to deduct from the activations. - Defaults to 0. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. """ def __init__( self, target: nn.Module, constant: float = 0.0, - batch_index: Optional[int] = None, + batch_index: Optional[Union[int, List[int]]] = None, ) -> None: + """ + Args: + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + constant (float): Constant threshold to deduct from the activations. + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set to + None, defaults to all activations in the batch. index ranges should be + in the format of: [start, end]. + Default: None + """ BaseLoss.__init__(self, target, batch_index) self.constant = constant @@ -375,34 +409,40 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: class L2(BaseLoss): """ L2 norm of the target layer, generally used as a penalty. - - Args: - target (nn.Module): The layer to optimize for. - constant (float): Constant threshold to deduct from the activations. - Defaults to 0. - epsilon (float): Small value to add to L2 prior to sqrt. Defaults to 1e-6. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. """ def __init__( self, target: nn.Module, constant: float = 0.0, - epsilon: float = 1e-6, - batch_index: Optional[int] = None, + eps: float = 1e-6, + batch_index: Optional[Union[int, List[int]]] = None, ) -> None: + """ + Args: + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + constant (float): Constant threshold to deduct from the activations. + Default: 0.0 + eps (float): Small value to add to L2 prior to sqrt. + Default: 1e-6 + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set to + None, defaults to all activations in the batch. index ranges should be + in the format of: [start, end]. + Default: None + """ BaseLoss.__init__(self, target, batch_index) self.constant = constant - self.epsilon = epsilon + self.eps = eps def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: activations = targets_to_values[self.target][ self.batch_index[0] : self.batch_index[1] ] activations = ((activations - self.constant) ** 2).sum() - return torch.sqrt(self.epsilon + activations) + return torch.sqrt(self.eps + activations) @loss_wrapper @@ -416,13 +456,18 @@ class Diversity(BaseLoss): loss. Args: - target (nn.Module): The layer to optimize for. - batch_index (int, optional): Unused here since we are optimizing for diversity - across the batch. + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + batch_index (list of int, optional): The index range of activations to + optimize. If set to None, defaults to all activations in the batch. index + ranges should be in the format of: [start, end]. + Default: None """ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: activations = targets_to_values[self.target] + activations = activations[self.batch_index[0] : self.batch_index[1]] batch, channels = activations.shape[:2] flattened = activations.view(batch, channels, -1) grams = torch.matmul(flattened, torch.transpose(flattened, 1, 2)) @@ -446,23 +491,29 @@ class ActivationInterpolation(BaseLoss): https://distill.pub/2017/feature-visualization/#Interaction-between-Neurons This loss helps to interpolate or mix visualizations from two activations (layer or channel) by interpolating a linear sum between the two activations. - - Args: - target1 (nn.Module): The first layer to optimize for. - channel_index1 (int): Index of channel in first layer to optimize. Defaults to - all channels. - target2 (nn.Module): The first layer to optimize for. - channel_index2 (int): Index of channel in first layer to optimize. Defaults to - all channels. """ def __init__( self, target1: nn.Module = None, - channel_index1: int = -1, + channel_index1: Optional[int] = None, target2: nn.Module = None, - channel_index2: int = -1, + channel_index2: Optional[int] = None, ) -> None: + """ + Args: + + target1 (nn.Module): The first layer, transform, or image parameterization + instance to optimize the output for. + channel_index1 (int, optional): Index of channel in first target to + optimize. Default is set to None for all channels. + Default: None + target2 (nn.Module): The second layer, transform, or image parameterization + instance to optimize the output for. + channel_index2 (int, optional): Index of channel in second target to + optimize. Default is set to None for all channels. + Default: None + """ self.target_one = target1 self.channel_index_one = channel_index1 self.target_two = target2 @@ -476,15 +527,16 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: assert activations_one is not None and activations_two is not None # ensure channel indices are valid - assert ( - self.channel_index_one < activations_one.shape[1] - and self.channel_index_two < activations_two.shape[1] - ) + if self.channel_index_one: + assert self.channel_index_one < activations_one.shape[1] + if self.channel_index_two: + assert self.channel_index_two < activations_two.shape[1] + assert activations_one.size(0) == activations_two.size(0) - if self.channel_index_one > -1: + if self.channel_index_one: activations_one = activations_one[:, self.channel_index_one] - if self.channel_index_two > -1: + if self.channel_index_two: activations_two = activations_two[:, self.channel_index_two] B = activations_one.size(0) @@ -508,19 +560,35 @@ class Alignment(BaseLoss): When interpolating between activations, it may be desirable to keep image landmarks in the same position for visual comparison. This loss helps to minimize L2 distance between neighbouring images. - - Args: - target (nn.Module): The layer to optimize for. - decay_ratio (float): How much to decay penalty as images move apart in batch. - Defaults to 2. """ - def __init__(self, target: nn.Module, decay_ratio: float = 2.0) -> None: - BaseLoss.__init__(self, target) + def __init__( + self, + target: nn.Module, + decay_ratio: float = 2.0, + batch_index: Optional[List[int]] = None, + ) -> None: + """ + Args: + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + decay_ratio (float): How much to decay penalty as images move apart in + the batch. + Default: 2.0 + batch_index (list of int, optional): The index range of activations to + optimize. If set to None, defaults to all activations in the batch. + index ranges should be in the format of: [start, end]. + Default: None + """ + if batch_index: + assert len(batch_index) == 2 + BaseLoss.__init__(self, target, batch_index) self.decay_ratio = decay_ratio def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: activations = targets_to_values[self.target] + activations = activations[self.batch_index[0] : self.batch_index[1]] B = activations.size(0) sum_tensor = torch.zeros(1, device=activations.device) @@ -545,14 +613,6 @@ class Direction(BaseLoss): the alignment between the input vector and the layer’s activation vector. The dimensionality of the vector should correspond to the number of channels in the layer. - - Args: - target (nn.Module): The layer to optimize for. - vec (torch.Tensor): Vector representing direction to align to. - cossim_pow (float, optional): The desired cosine similarity power to use. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. """ def __init__( @@ -562,6 +622,19 @@ def __init__( cossim_pow: Optional[float] = 0.0, batch_index: Optional[int] = None, ) -> None: + """ + Args: + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + vec (torch.Tensor): Vector representing direction to align to. + cossim_pow (float, optional): The desired cosine similarity power to use. + Default: 0.0 + batch_index (int, optional): The index of activations to optimize if + optimizing a batch of activations. If set to None, defaults to all + activations in the batch. + Default: None + """ BaseLoss.__init__(self, target, batch_index) self.vec = vec.reshape((1, -1, 1, 1)) self.cossim_pow = cossim_pow @@ -581,21 +654,6 @@ class NeuronDirection(BaseLoss): https://distill.pub/2019/activation-atlas/#Aggregating-Multiple-Images Extends Direction loss by focusing on visualizing a single neuron within the kernel. - - Args: - target (nn.Module): The layer to optimize for. - vec (torch.Tensor): Vector representing direction to align to. - x (int, optional): The x coordinate of the neuron to optimize for. If - unspecified, defaults to center, or one unit left of center for even - lengths. - y (int, optional): The y coordinate of the neuron to optimize for. If - unspecified, defaults to center, or one unit up of center for even - heights. - channel_index (int): The index of the channel to optimize for. - cossim_pow (float, optional): The desired cosine similarity power to use. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. """ def __init__( @@ -608,6 +666,30 @@ def __init__( cossim_pow: Optional[float] = 0.0, batch_index: Optional[int] = None, ) -> None: + """ + Args: + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + vec (torch.Tensor): Vector representing direction to align to. + x (int, optional): The x coordinate of the neuron to optimize for. If + set to None, defaults to center, or one unit left of center for even + lengths. + Default: None + y (int, optional): The y coordinate of the neuron to optimize for. If + set to None, defaults to center, or one unit up of center for even + heights. + Default: None + channel_index (int): The index of the channel to optimize for. If set to + None, then all channels will be used. + Default: None + cossim_pow (float, optional): The desired cosine similarity power to use. + Default: 0.0 + batch_index (int, optional): The index of activations to optimize if + optimizing a batch of activations. If set to None, defaults to all + activations in the batch. + Default: None + """ BaseLoss.__init__(self, target, batch_index) self.vec = vec.reshape((1, -1, 1, 1)) self.x = x @@ -673,16 +755,25 @@ def __init__( ) -> None: """ Args: - target (nn.Module): A target layer instance. + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. vec (torch.Tensor): A neuron direction vector to use. vec_whitened (torch.Tensor, optional): A whitened neuron direction vector. + If set to None, then no whitened vec will be used. + Default: None cossim_pow (float, optional): The desired cosine similarity power to use. - x (int, optional): Optionally provide a specific x position for the target - neuron. - y (int, optional): Optionally provide a specific y position for the target - neuron. + x (int, optional): The x coordinate of the neuron to optimize for. If + set to None, defaults to center, or one unit left of center for even + lengths. + Default: None + y (int, optional): The y coordinate of the neuron to optimize for. If + set to None, defaults to center, or one unit up of center for even + heights. + Default: None eps (float, optional): If cossim_pow is greater than zero, the desired epsilon value to use for cosine similarity calculations. + Default: 1.0e-4 """ BaseLoss.__init__(self, target, batch_index) self.vec = vec.unsqueeze(0) if vec.dim() == 1 else vec @@ -726,14 +817,6 @@ class TensorDirection(BaseLoss): Carter, et al., "Activation Atlas", Distill, 2019. https://distill.pub/2019/activation-atlas/#Aggregating-Multiple-Images Extends Direction loss by allowing batch-wise direction visualization. - - Args: - target (nn.Module): The layer to optimize for. - vec (torch.Tensor): Vector representing direction to align to. - cossim_pow (float, optional): The desired cosine similarity power to use. - batch_index (int, optional): The index of the image to optimize if we - optimizing a batch of images. If unspecified, defaults to all images - in the batch. """ def __init__( @@ -743,6 +826,19 @@ def __init__( cossim_pow: Optional[float] = 0.0, batch_index: Optional[int] = None, ) -> None: + """ + Args: + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + vec (torch.Tensor): Vector representing direction to align to. + cossim_pow (float, optional): The desired cosine similarity power to use. + Default: 0.0 + batch_index (int, optional): The index of activations to optimize if + optimizing a batch of activations. If set to None, defaults to all + activations in the batch. + Default: None + """ BaseLoss.__init__(self, target, batch_index) assert vec.dim() == 4 self.vec = vec @@ -774,21 +870,6 @@ class ActivationWeights(BaseLoss): Apply weights to channels, neurons, or spots in the target. This loss weighs specific channels or neurons in a given layer, via a weight vector. - - Args: - target (nn.Module): The layer to optimize for. - weights (torch.Tensor): Weights to apply to targets. - neuron (bool): Whether target is a neuron. Defaults to False. - x (int, optional): The x coordinate of the neuron to optimize for. If - unspecified, defaults to center, or one unit left of center for even - lengths. - y (int, optional): The y coordinate of the neuron to optimize for. If - unspecified, defaults to center, or one unit up of center for even - heights. - wx (int, optional): Length of neurons to apply the weights to, along the - x-axis. - wy (int, optional): Length of neurons to apply the weights to, along the - y-axis. """ def __init__( @@ -801,6 +882,29 @@ def __init__( wx: Optional[int] = None, wy: Optional[int] = None, ) -> None: + """ + Args: + + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + weights (torch.Tensor): Weights to apply to targets. + neuron (bool): Whether target is a neuron. + Default: False + x (int, optional): The x coordinate of the neuron to optimize for. If + set to None, defaults to center, or one unit left of center for even + lengths. + Default: None + y (int, optional): The y coordinate of the neuron to optimize for. If + set to None, defaults to center, or one unit up of center for even + heights. + Default: None + wx (int, optional): Length of neurons to apply the weights to, along the + x-axis. Set to None for the full length. + Default: None + wy (int, optional): Length of neurons to apply the weights to, along the + y-axis. Set to None for the full length. + Default: None + """ BaseLoss.__init__(self, target) self.x = x self.y = y From c905352ed283524c6c15e24f3218d043716b9e75 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 17 May 2022 18:58:52 -0600 Subject: [PATCH 010/174] Fix NeuronActivation docs --- captum/optim/_core/loss.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 682e5b44eb..194422f3f6 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -279,7 +279,8 @@ def __init__( """ Args: - target (nn.Module): The layer instance containing the channel to optimize for. + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. channel_index (int): The index of the channel to optimize for. x (int, optional): The x coordinate of the neuron to optimize for. If unspecified, defaults to center, or one unit left of center for even From a7027286f9c8eee91a5fade3958e6f85d5eee3e7 Mon Sep 17 00:00:00 2001 From: Vivek Miglani Date: Wed, 18 May 2022 10:00:30 -0700 Subject: [PATCH 011/174] SGD Linear Model Fixes for Lime (#938) Summary: This updates SGD linear models to work appropriately with Lime, addressing https://github.com/pytorch/captum/issues/910 . Particularly, this switches Lime interpretable model inputs / outputs from double to float and enables gradients when necessary. Also adds a unit test to Lime for testing with SGD linear models. Pull Request resolved: https://github.com/pytorch/captum/pull/938 Reviewed By: NarineK Differential Revision: D36331146 Pulled By: vivekmig fbshipit-source-id: 84d7aecf293404f9ba0b14c48e8723e0e489b392 --- captum/_utils/models/linear_model/train.py | 139 ++++++++++----------- captum/attr/_core/lime.py | 6 +- tests/attr/test_lime.py | 31 ++++- 3 files changed, 99 insertions(+), 77 deletions(-) diff --git a/captum/_utils/models/linear_model/train.py b/captum/_utils/models/linear_model/train.py index aaf8a2e4bf..30e5edf112 100644 --- a/captum/_utils/models/linear_model/train.py +++ b/captum/_utils/models/linear_model/train.py @@ -99,7 +99,6 @@ def sgd_train_linear_model( This will return the final training loss (averaged with `running_loss_window`) """ - loss_window: List[torch.Tensor] = [] min_avg_loss = None convergence_counter = 0 @@ -145,77 +144,77 @@ def get_point(datapoint): if model.linear.bias is not None: model.linear.bias.zero_() - optim = torch.optim.SGD(model.parameters(), lr=initial_lr) - if reduce_lr: - scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( - optim, factor=0.5, patience=patience, threshold=threshold - ) - - t1 = time.time() - epoch = 0 - i = 0 - while epoch < max_epoch: - while True: # for x, y, w in dataloader - if running_loss_window is None: - running_loss_window = x.shape[0] * len(dataloader) - - y = y.view(x.shape[0], -1) - if w is not None: - w = w.view(x.shape[0], -1) - - i += 1 - - out = model(x) - - loss = loss_fn(y, out, w) - if reg_term is not None: - reg = torch.norm(model.linear.weight, p=reg_term) - loss += reg.sum() * alpha - - if len(loss_window) >= running_loss_window: - loss_window = loss_window[1:] - loss_window.append(loss.clone().detach()) - assert len(loss_window) <= running_loss_window - - average_loss = torch.mean(torch.stack(loss_window)) - if min_avg_loss is not None: - # if we haven't improved by at least `threshold` - if average_loss > min_avg_loss or torch.isclose( - min_avg_loss, average_loss, atol=threshold - ): - convergence_counter += 1 - if convergence_counter >= patience: - converged = True - break - else: - convergence_counter = 0 - if min_avg_loss is None or min_avg_loss >= average_loss: - min_avg_loss = average_loss.clone() - - if debug: - print( - f"lr={optim.param_groups[0]['lr']}, Loss={loss}," - + "Aloss={average_loss}, min_avg_loss={min_avg_loss}" - ) - - loss.backward() - - optim.step() - model.zero_grad() - if scheduler: - scheduler.step(average_loss) - - temp = next(data_iter, None) - if temp is None: + with torch.enable_grad(): + optim = torch.optim.SGD(model.parameters(), lr=initial_lr) + if reduce_lr: + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( + optim, factor=0.5, patience=patience, threshold=threshold + ) + + t1 = time.time() + epoch = 0 + i = 0 + while epoch < max_epoch: + while True: # for x, y, w in dataloader + if running_loss_window is None: + running_loss_window = x.shape[0] * len(dataloader) + + y = y.view(x.shape[0], -1) + if w is not None: + w = w.view(x.shape[0], -1) + + i += 1 + + out = model(x) + + loss = loss_fn(y, out, w) + if reg_term is not None: + reg = torch.norm(model.linear.weight, p=reg_term) + loss += reg.sum() * alpha + + if len(loss_window) >= running_loss_window: + loss_window = loss_window[1:] + loss_window.append(loss.clone().detach()) + assert len(loss_window) <= running_loss_window + + average_loss = torch.mean(torch.stack(loss_window)) + if min_avg_loss is not None: + # if we haven't improved by at least `threshold` + if average_loss > min_avg_loss or torch.isclose( + min_avg_loss, average_loss, atol=threshold + ): + convergence_counter += 1 + if convergence_counter >= patience: + converged = True + break + else: + convergence_counter = 0 + if min_avg_loss is None or min_avg_loss >= average_loss: + min_avg_loss = average_loss.clone() + + if debug: + print( + f"lr={optim.param_groups[0]['lr']}, Loss={loss}," + + "Aloss={average_loss}, min_avg_loss={min_avg_loss}" + ) + + loss.backward() + optim.step() + model.zero_grad() + if scheduler: + scheduler.step(average_loss) + + temp = next(data_iter, None) + if temp is None: + break + x, y, w = get_point(temp) + + if converged: break - x, y, w = get_point(temp) - - if converged: - break - epoch += 1 - data_iter = iter(dataloader) - x, y, w = get_point(next(data_iter)) + epoch += 1 + data_iter = iter(dataloader) + x, y, w = get_point(next(data_iter)) t2 = time.time() return { diff --git a/captum/attr/_core/lime.py b/captum/attr/_core/lime.py index 76f3f4ca71..520251ce53 100644 --- a/captum/attr/_core/lime.py +++ b/captum/attr/_core/lime.py @@ -512,17 +512,17 @@ def attribute( if show_progress: attr_progress.close() - combined_interp_inps = torch.cat(interpretable_inps).double() + combined_interp_inps = torch.cat(interpretable_inps).float() combined_outputs = ( torch.cat(outputs) if len(outputs[0].shape) > 0 else torch.stack(outputs) - ).double() + ).float() combined_sim = ( torch.cat(similarities) if len(similarities[0].shape) > 0 else torch.stack(similarities) - ).double() + ).float() dataset = TensorDataset( combined_interp_inps, combined_outputs, combined_sim ) diff --git a/tests/attr/test_lime.py b/tests/attr/test_lime.py index 4287aa05ba..45646c47d7 100644 --- a/tests/attr/test_lime.py +++ b/tests/attr/test_lime.py @@ -3,10 +3,12 @@ import io import unittest import unittest.mock -from typing import Any, Callable, Generator, List, Tuple, Union +from functools import partial +from typing import Any, Callable, Generator, List, Optional, Tuple, Union import torch -from captum._utils.models.linear_model import SkLearnLasso +from captum._utils.models.linear_model import SGDLasso, SkLearnLasso +from captum._utils.models.model import Model from captum._utils.typing import BaselineType, TensorOrTupleOfTensorsGeneric from captum.attr._core.lime import get_exp_kernel_similarity_function, Lime, LimeBase from captum.attr._utils.batching import _batch_example_iterator @@ -120,6 +122,22 @@ def test_simple_lime(self) -> None: test_generator=True, ) + def test_simple_lime_sgd_model(self) -> None: + net = BasicModel_MultiLayer() + inp = torch.tensor([[20.0, 50.0, 30.0]], requires_grad=True) + interpretable_model = SGDLasso() + interpretable_model.fit = partial( # type: ignore + interpretable_model.fit, initial_lr=0.1, max_epoch=500 + ) + self._lime_test_assert( + net, + inp, + [[73.3716, 193.3349, 113.3349]], + n_samples=1000, + expected_coefs_only=[[73.3716, 193.3349, 113.3349]], + interpretable_model=interpretable_model, + ) + def test_simple_lime_with_mask(self) -> None: net = BasicModel_MultiLayer() inp = torch.tensor([[20.0, 50.0, 30.0]], requires_grad=True) @@ -487,12 +505,15 @@ def _lime_test_assert( batch_attr: bool = False, test_generator: bool = False, show_progress: bool = False, + interpretable_model: Optional[Model] = None, ) -> None: for batch_size in perturbations_per_eval: lime = Lime( model, similarity_func=get_exp_kernel_similarity_function("cosine", 10.0), - interpretable_model=SkLearnLasso(alpha=1.0), + interpretable_model=interpretable_model + if interpretable_model + else SkLearnLasso(alpha=1.0), ) attributions = lime.attribute( test_input, @@ -526,7 +547,9 @@ def _lime_test_assert( lime_alt = LimeBase( model, - SkLearnLasso(alpha=1.0), + interpretable_model + if interpretable_model + else SkLearnLasso(alpha=1.0), get_exp_kernel_similarity_function("euclidean", 1000.0), alt_perturb_generator if test_generator else alt_perturb_func, False, From 8c28dad6172ea1965b518882a6e30bea17425140 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 18 May 2022 14:12:39 -0600 Subject: [PATCH 012/174] Fix FacetLoss docs --- captum/optim/_core/loss.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 1dca3c50ad..94bcdbf9f5 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -981,13 +981,16 @@ def __init__( visualizing targets from. This is normally the penultimate layer of the model. layer_target (nn.Module): A layer that we have facet_weights for. This - target layer should be below the ultimate_target layer in the model. - strength (float, list of float, optional): A list of floats to use for batch - dimension weighting. Default is set to None for no weighting. - Default: None + target layer should be below the ultimate_target layer in the model. + strength (float, list of float, optional): A single float or list of floats + to use for batch dimension weighting. If using a single value, then it + will be applied to all batch dimensions equally. Otherwise a list of + floats with a shape of: [start, end] should be used for torch.linspace + to calculate the step values in between. Default is set to None for no + weighting. facet_weights (torch.Tensor): Weighting that steers the objective towards a particular theme or concept. These weight values should - come from linear probes trained on layers in target_layers. + come from linear probes trained on layer_target. batch_index (int, optional): The index of the activations to optimize if optimizing a batch of activations. If set to None, defaults to all activations in the batch. @@ -997,6 +1000,8 @@ def __init__( self.ultimate_target = ultimate_target self.layer_target = layer_target self.vec = vec + if isinstance(strength, (tuple, list)): + assert len(strength) == 2 self.strength = strength assert facet_weights.dim() == 4 or facet_weights.dim() == 2 self.facet_weights = facet_weights From 32fc6936783f839c428c7604fe584666b7fd12bf Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 18 May 2022 14:20:53 -0600 Subject: [PATCH 013/174] Improve VectorLoss docs --- captum/optim/_core/loss.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 94bcdbf9f5..cd52f02951 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -916,12 +916,8 @@ def __init__( Args: target (nn.Module): A target layer instance. - vec (torch.Tensor): A direction vector to use, with a compatible shape for - computing the matrix product of the activations. See torch.matmul for - See torch.matmul for more details on compatible shapes: - https://pytorch.org/docs/stable/generated/torch.matmul.html - By default, vec is expected to share the same size as the channel - dimension of the activations. + vec (torch.Tensor): A 1D channel vector with the same size as the + channel / feature dimension of the target layer instance. activation_fn (Callable, optional): An optional activation function to apply to the activations before computing the matrix product. If set to None, then no activation function will be used. @@ -936,6 +932,7 @@ def __init__( Default: None """ BaseLoss.__init__(self, target, batch_index) + assert vec.dim() == 1 self.vec = vec self.activation_fn = activation_fn self.move_channel_dim_to_final_dim = move_channel_dim_to_final_dim @@ -976,7 +973,8 @@ def __init__( """ Args: - vec (torch.Tensor): A 1D channel vector. + vec (torch.Tensor): A 1D channel vector with the same size as the + channel / feature dimension of ultimate_target. ultimate_target (nn.Module): The main target layer that we are visualizing targets from. This is normally the penultimate layer of the model. @@ -999,6 +997,7 @@ def __init__( BaseLoss.__init__(self, [ultimate_target, layer_target], batch_index) self.ultimate_target = ultimate_target self.layer_target = layer_target + assert vec.dim() == 1 self.vec = vec if isinstance(strength, (tuple, list)): assert len(strength) == 2 From c211032eb6afff03cc7acee3533aed614a024711 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 18 May 2022 18:55:30 -0700 Subject: [PATCH 014/174] Fix version check bug (#940) Summary: By default: `"1.8.0" > "1.10.0"` will be equal to True, despite 1.10 being a later version that 1.8.0. This PR fixes this issue. Pull Request resolved: https://github.com/pytorch/captum/pull/940 Reviewed By: NarineK Differential Revision: D36336547 Pulled By: vivekmig fbshipit-source-id: 84f277eb1e6897a8378ce9eb8c9eab3285ad8494 --- tests/utils/test_sample_gradient.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/utils/test_sample_gradient.py b/tests/utils/test_sample_gradient.py index 8f49235e72..8f8279b678 100644 --- a/tests/utils/test_sample_gradient.py +++ b/tests/utils/test_sample_gradient.py @@ -5,6 +5,7 @@ import torch from captum._utils.sample_gradient import SampleGradientWrapper, SUPPORTED_MODULES +from packaging import version from tests.helpers.basic import assertTensorAlmostEqual, BaseTest from tests.helpers.basic_models import ( BasicModel_ConvNet_One_Conv, @@ -37,7 +38,7 @@ def test_sample_grads_conv_mean_multi_inp(self) -> None: self._compare_sample_grads_per_sample(model, inp, lambda x: torch.mean(x)) def test_sample_grads_modified_conv_mean(self) -> None: - if torch.__version__ < "1.8": + if version.parse(torch.__version__) < version.parse("1.8.0"): raise unittest.SkipTest( "Skipping sample gradient test with 3D linear module" "since torch version < 1.8" @@ -50,7 +51,7 @@ def test_sample_grads_modified_conv_mean(self) -> None: ) def test_sample_grads_modified_conv_sum(self) -> None: - if torch.__version__ < "1.8": + if version.parse(torch.__version__) < version.parse("1.8.0"): raise unittest.SkipTest( "Skipping sample gradient test with 3D linear module" "since torch version < 1.8" From 7b78aaad80e7d5d411f768c1190b2ead8f738cfd Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 21 May 2022 16:04:09 -0600 Subject: [PATCH 015/174] Improve loss objective testing * Ensure testing coverage is as high as possible. * Simplified code with new `rmodule_op` function. * Removed the NumPy import from loss testing. --- captum/optim/_core/loss.py | 59 ++--- tests/optim/core/test_loss.py | 472 ++++++++++++++++++++++++++++++---- 2 files changed, 453 insertions(+), 78 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 66bb4c40c2..1365537c19 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -64,40 +64,10 @@ def __rmul__(self, other: Union[int, float, "Loss"]) -> "CompositeLoss": return self.__mul__(other) def __rtruediv__(self, other: Union[int, float, "Loss"]) -> "CompositeLoss": - if isinstance(other, (int, float)): - - def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: - return operator.truediv(other, torch.mean(self(module))) - - name = self.__name__ - target = self.target - elif isinstance(other, Loss): - # This should never get called because __div__ will be called instead - pass - else: - raise TypeError( - "Can only apply math operations with int, float or Loss. Received type " - + str(type(other)) - ) - return CompositeLoss(loss_fn, name=name, target=target) + return rmodule_op(self, other, operator.truediv) def __rpow__(self, other: Union[int, float, "Loss"]) -> "CompositeLoss": - if isinstance(other, (int, float)): - - def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: - return operator.pow(other, torch.mean(self(module))) - - name = self.__name__ - target = self.target - elif isinstance(other, Loss): - # This should never get called because __pow__ will be called instead - pass - else: - raise TypeError( - "Can only apply math operations with int, float or Loss. Received type " - + str(type(other)) - ) - return CompositeLoss(loss_fn, name=name, target=target) + return rmodule_op(self, other, operator.pow) def module_op( @@ -137,6 +107,31 @@ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: return CompositeLoss(loss_fn, name=name, target=target) +def rmodule_op( + self: Loss, other: Union[int, float, Loss], math_op: Callable +) -> "CompositeLoss": + """ + This is a general function for applying the "r" versions of math operations to + Losses. + """ + if isinstance(other, (int, float)): + + def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: + return math_op(other, self(module)) + + name = self.__name__ + target = self.target + elif isinstance(other, Loss): + # This should never get called because __math_op__ will be called instead + pass + else: + raise TypeError( + "Can only apply math operations with int, float or Loss. Received type " + + str(type(other)) + ) + return CompositeLoss(loss_fn, name=name, target=target) + + class BaseLoss(Loss): def __init__( self, diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 49c35ed9d4..fa24bd9337 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 +import operator import unittest -from typing import cast, List, Union +from typing import cast, Any, List, Optional, Type, Union import captum.optim._core.loss as opt_loss -import numpy as np import torch from captum.optim.models import collect_activations from packaging import version @@ -16,7 +16,7 @@ def get_loss_value( model: torch.nn.Module, loss: opt_loss.Loss, input_shape: List[int] = [1, 3, 1, 1] -) -> Union[int, float, np.ndarray]: +) -> Union[int, float]: module_outputs = collect_activations(model, loss.target, torch.ones(*input_shape)) loss_value = loss(module_outputs) try: @@ -36,6 +36,12 @@ def test_channel_deepdream(self) -> None: class TestChannelActivation(BaseTest): + def test_channel_activation_init(self) -> None: + model = torch.nn.Identity() + channel_index = 5 + loss = opt_loss.ChannelActivation(model, channel_index=channel_index) + self.assertEqual(loss.channel_index, channel_index) + def test_channel_activation_0(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.ChannelActivation(model.layer, 0) @@ -52,6 +58,14 @@ def test_channel_activation_1(self) -> None: class TestNeuronActivation(BaseTest): + def test_neuron_activation_init(self) -> None: + model = torch.nn.Identity() + channel_index = 5 + loss = opt_loss.NeuronActivation(model, channel_index=channel_index) + self.assertEqual(loss.channel_index, channel_index) + self.assertIsNone(loss.x) + self.assertIsNone(loss.y) + def test_neuron_activation_0(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.NeuronActivation(model.layer, 0) @@ -68,6 +82,11 @@ def test_total_variation(self) -> None: class TestL1(BaseTest): + def test_l1_init(self) -> None: + model = torch.nn.Identity() + loss = opt_loss.L1(model) + self.assertEqual(loss.constant, 0.0) + def test_l1(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.L1(model.layer) @@ -79,6 +98,12 @@ def test_l1(self) -> None: class TestL2(BaseTest): + def test_l2_init(self) -> None: + model = torch.nn.Identity() + loss = opt_loss.L2(model) + self.assertEqual(loss.constant, 0.0) + self.assertEqual(loss.epsilon, 1e-6) + def test_l2(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.L2(model.layer) @@ -129,54 +154,140 @@ def test_alignment(self) -> None: ) +class TestDirection(BaseTest): + def test_direction_init(self) -> None: + model = torch.nn.Identity() + vec = torch.ones(2) * 0.5 + loss = opt_loss.Direction(model, vec=vec) + self.assertEqual(list(loss.vec.shape), [1, 2, 1, 1]) + assertTensorAlmostEqual(self, loss.vec, vec.reshape((1, -1, 1, 1)), delta=0.0) + self.assertEqual(loss.cossim_pow, 0.0) + + def test_direction(self) -> None: + model = BasicModel_ConvNet_Optim() + vec = torch.ones(2) + loss = opt_loss.Direction(model.layer, vec=torch.ones(2)) + b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS]) + dot = torch.sum(vec.reshape((1, -1, 1, 1)) * b.reshape((1, -1, 1, 1)), 1) + self.assertAlmostEqual(get_loss_value(model, loss), dot.item(), places=6) + + class TestNeuronDirection(BaseTest): + def test_neuron_direction_init(self) -> None: + model = torch.nn.Identity() + vec = torch.ones(2) * 0.5 + loss = opt_loss.NeuronDirection(model, vec=vec) + self.assertIsNone(loss.x) + self.assertIsNone(loss.y) + self.assertIsNone(loss.channel_index) + self.assertEqual(loss.cossim_pow, 0.0) + self.assertEqual(list(loss.vec.shape), [1, 2, 1, 1]) + assertTensorAlmostEqual(self, loss.vec, vec.reshape((1, -1, 1, 1)), delta=0.0) + def test_neuron_direction(self) -> None: model = BasicModel_ConvNet_Optim() - loss = opt_loss.NeuronDirection(model.layer, vec=torch.ones(1, 1, 1, 1)) - a = 1 - b = [CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS] - dot = np.sum(np.inner(a, b)) - self.assertAlmostEqual(get_loss_value(model, loss), dot, places=6) + vec = torch.ones(2) + loss = opt_loss.NeuronDirection(model.layer, vec=vec) + b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS]) + dot = torch.sum(b * vec) + self.assertAlmostEqual(get_loss_value(model, loss), dot.item(), places=6) + + def test_neuron_direction_channel_index(self) -> None: + model = BasicModel_ConvNet_Optim() + vec = torch.ones(2) + loss = opt_loss.NeuronDirection(model.layer, vec=vec, channel_index=0) + + b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS]) + dot = torch.sum(b * vec) + self.assertAlmostEqual(get_loss_value(model, loss), dot.item(), places=6) class TestAngledNeuronDirection(BaseTest): - def test_angled_neuron_direction(self) -> None: - model = BasicModel_ConvNet_Optim() + def test_neuron_activation_init(self) -> None: + model = torch.nn.Identity() + vec = torch.ones(1, 2) * 0.5 loss = opt_loss.AngledNeuronDirection( - model.layer, vec=torch.ones(1, 2), cossim_pow=0 + model, + vec=vec, ) - a = 1 - b = [CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS] - dot = torch.sum(torch.as_tensor(np.inner(a, b))).item() + self.assertEqual(loss.eps, 1.0e-4) + self.assertEqual(loss.cossim_pow, 4.0) + self.assertIsNone(loss.x) + self.assertIsNone(loss.y) + self.assertIsNone(loss.vec_whitened) + assertTensorAlmostEqual(self, loss.vec, vec, delta=0.0) + + def test_angled_neuron_direction(self) -> None: + model = BasicModel_ConvNet_Optim() + vec = torch.ones(1, 2) + loss = opt_loss.AngledNeuronDirection(model.layer, vec=vec, cossim_pow=0) + b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_0_LOSS]) + dot = torch.sum(b * vec).item() output = torch.sum(cast(torch.Tensor, get_loss_value(model, loss))) self.assertAlmostEqual(output.item(), dot, places=6) def test_angled_neuron_direction_whitened(self) -> None: model = BasicModel_ConvNet_Optim() + vec = torch.ones(1, 2) loss = opt_loss.AngledNeuronDirection( model.layer, - vec=torch.ones(1, 2), + vec=vec, vec_whitened=torch.ones(2, 2), cossim_pow=0, ) - a = 1 - b = [CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS] - dot = torch.sum(torch.as_tensor(np.inner(a, b))).item() * 2 + b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_0_LOSS]) + dot = torch.sum(vec * b).item() * 2 output = torch.sum(cast(torch.Tensor, get_loss_value(model, loss))) self.assertAlmostEqual(output.item(), dot, places=6) + def test_angled_neuron_direction_cossim_pow_4(self) -> None: + model = BasicModel_ConvNet_Optim() + cossim_pow = 4.0 + vec = torch.ones(1, 2) + loss = opt_loss.AngledNeuronDirection( + model.layer, vec=vec, cossim_pow=cossim_pow + ) + a = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_0_LOSS])[ + None, : + ] + + dot = torch.mean(a * vec) + cossims = dot / (1.0e-4 + torch.sqrt(torch.sum(a**2))) + dot = dot * torch.clamp(cossims, min=0.1) ** cossim_pow + + output = get_loss_value(model, loss) + self.assertAlmostEqual(output, dot.item(), places=6) + class TestTensorDirection(BaseTest): + def test_tensor_init(self) -> None: + model = BasicModel_ConvNet_Optim() + vec = torch.ones(1, 1, 1, 1) + loss = opt_loss.TensorDirection(model.layer, vec=vec) + self.assertEqual(loss.cossim_pow, 0.0) + assertTensorAlmostEqual(self, loss.vec, vec, delta=0.0) + def test_tensor_direction(self) -> None: model = BasicModel_ConvNet_Optim() - loss = opt_loss.TensorDirection(model.layer, vec=torch.ones(1, 1, 1, 1)) - a = 1 - b = [CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS] - dot = np.sum(np.inner(a, b)) + vec = torch.ones(1, 1, 1, 1) + loss = opt_loss.TensorDirection(model.layer, vec=vec) + b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS]) + dot = torch.sum(b[None, :, None, None] * vec).item() self.assertAlmostEqual(get_loss_value(model, loss), dot, places=6) class TestActivationWeights(BaseTest): + def test_neuron_activation_init(self) -> None: + model = torch.nn.Identity() + weights = torch.zeros(1) + loss = opt_loss.ActivationWeights(model, weights=weights) + self.assertIsNone(loss.x) + self.assertIsNone(loss.y) + self.assertIsNone(loss.wx) + self.assertIsNone(loss.wy) + self.assertFalse(loss.neuron) + assertTensorAlmostEqual(self, loss.weights, weights, delta=0.0) + def test_activation_weights_0(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.ActivationWeights(model.layer, weights=torch.zeros(1)) @@ -196,8 +307,89 @@ def test_activation_weights_1(self) -> None: mode="max", ) + def test_activation_weights_neuron_1(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ActivationWeights( + model.layer, weights=torch.ones(1), neuron=True, x=0, y=0, wx=1, wy=1 + ) + assertTensorAlmostEqual( + self, + get_loss_value(model, loss), + torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS])[ + None, :, None, None + ], + mode="max", + ) + + +class _OverrideAbstractFunctions: + """ + Context manager for testing classes with abstract functions. + + Examples:: + >>> # Overriding the abstract methods in BaseLoss + >>> with _OverrideAbstractFunctions(path.to.classtype): + >>> # Do stuff with + """ + + def __init__(self, class_type: Type) -> None: + """ + Args: + + class_type (type): The path to the library class type. + """ + self.class_type = class_type + + def __enter__(self) -> None: + self.abstract_methods = self.class_type.__abstractmethods__ + self.class_type.__abstractmethods__ = frozenset() + + def __exit__(self, *args: Any) -> None: + self.class_type.__abstractmethods__ = self.abstract_methods + + +class TestLoss(BaseTest): + def test_loss_init(self) -> None: + with _OverrideAbstractFunctions(opt_loss.Loss): + loss = opt_loss.Loss() + self.assertIsNone(loss.target) + self.assertEqual(opt_loss.Loss.__name__, "Loss") + + +class TestBaseLoss(BaseTest): + def test_subclass(self) -> None: + self.assertTrue(issubclass(opt_loss.BaseLoss, opt_loss.Loss)) + + def test_base_loss_init(self) -> None: + model = torch.nn.Identity() + with _OverrideAbstractFunctions(opt_loss.BaseLoss): + loss = opt_loss.BaseLoss(model) + self.assertEqual(loss._batch_index, (None, None)) + self.assertEqual(loss.batch_index, (None, None)) + self.assertEqual(loss._target, model) + self.assertEqual(loss.target, model) + + def test_base_loss_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 5 + with _OverrideAbstractFunctions(opt_loss.BaseLoss): + loss = opt_loss.BaseLoss(model, batch_index=batch_index) + self.assertEqual(loss._batch_index, (batch_index, batch_index + 1)) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + + def test_base_loss_target_list(self) -> None: + model = torch.nn.Sequential(torch.nn.Identity(), torch.nn.Identity()) + targets = [model[0], model[1]] + with _OverrideAbstractFunctions(opt_loss.BaseLoss): + loss = opt_loss.BaseLoss(targets) + self.assertEqual(loss._target, targets) + self.assertEqual(loss.target, targets) + class TestCompositeLoss(BaseTest): + def test_subclass(self) -> None: + self.assertTrue(issubclass(opt_loss.CompositeLoss, opt_loss.BaseLoss)) + def test_negative(self) -> None: model = BasicModel_ConvNet_Optim() loss = -opt_loss.ChannelActivation(model.layer, 0) @@ -218,6 +410,15 @@ def test_addition(self) -> None: places=6, ) + def test_radd(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = 1.0 + opt_loss.ChannelActivation(model.layer, 0) + self.assertAlmostEqual( + get_loss_value(model, loss), + CHANNEL_ACTIVATION_0_LOSS + 1.0, + places=6, + ) + def test_subtraction(self) -> None: model = BasicModel_ConvNet_Optim() loss = ( @@ -230,6 +431,25 @@ def test_subtraction(self) -> None: CHANNEL_ACTIVATION_0_LOSS - CHANNEL_ACTIVATION_1_LOSS - 1, ) + def test_rsub(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = 1.0 - opt_loss.ChannelActivation(model.layer, 0) + self.assertAlmostEqual( + get_loss_value(model, loss), + 1.0 - CHANNEL_ACTIVATION_0_LOSS, + ) + + def test_multiplication_loss_type(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ChannelActivation(model.layer, 0) * opt_loss.ChannelActivation( + model.layer, 1 + ) + self.assertAlmostEqual( + get_loss_value(model, loss), + CHANNEL_ACTIVATION_0_LOSS * CHANNEL_ACTIVATION_0_LOSS, + places=5, + ) + def test_multiplication(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.ChannelActivation(model.layer, 0) * 10 @@ -237,14 +457,32 @@ def test_multiplication(self) -> None: get_loss_value(model, loss), CHANNEL_ACTIVATION_0_LOSS * 10, places=5 ) - # def test_multiplication_error(self) -> None: - # model = BasicModel_ConvNet_Optim() - # with self.assertRaises(TypeError): - # opt_loss.ChannelActivation(model.layer, 0) * "string" - # with self.assertRaises(TypeError): - # opt_loss.ChannelActivation(model.layer, 0) * opt_loss.ChannelActivation( - # model.layer, 1 - # ) + def test_multiplication_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + opt_loss.ChannelActivation(model.layer, 0) * "string" + + def test_rmul(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = 10 * opt_loss.ChannelActivation(model.layer, 0) + self.assertAlmostEqual( + get_loss_value(model, loss), 10 * CHANNEL_ACTIVATION_0_LOSS, places=5 + ) + + def test_rmul_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + "string" * opt_loss.ChannelActivation(model.layer, 0) + + def test_division_loss_type(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ChannelActivation(model.layer, 0) / opt_loss.ChannelActivation( + model.layer, 1 + ) + self.assertAlmostEqual( + get_loss_value(model, loss), + CHANNEL_ACTIVATION_0_LOSS / CHANNEL_ACTIVATION_0_LOSS, + ) def test_division(self) -> None: model = BasicModel_ConvNet_Optim() @@ -253,14 +491,35 @@ def test_division(self) -> None: get_loss_value(model, loss), CHANNEL_ACTIVATION_0_LOSS / 10 ) - # def test_division_error(self) -> None: - # model = BasicModel_ConvNet_Optim() - # with self.assertRaises(TypeError): - # opt_loss.ChannelActivation(model.layer, 0) / "string" - # with self.assertRaises(TypeError): - # opt_loss.ChannelActivation(model.layer, 0) / opt_loss.ChannelActivation( - # model.layer, 1 - # ) + def test_division_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + opt_loss.ChannelActivation(model.layer, 0) / "string" + + def test_rdiv(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = 10.0 / opt_loss.ChannelActivation(model.layer, 0) + self.assertAlmostEqual( + get_loss_value(model, loss), + 10.0 / CHANNEL_ACTIVATION_0_LOSS, + places=6, + ) + + def test_rdiv_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + "string" / opt_loss.ChannelActivation(model.layer, 0) + + def test_pow_loss_type(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ChannelActivation(model.layer, 0) ** opt_loss.ChannelActivation( + model.layer, 1 + ) + self.assertAlmostEqual( + get_loss_value(model, loss), + CHANNEL_ACTIVATION_0_LOSS**CHANNEL_ACTIVATION_0_LOSS, + places=6, + ) def test_pow(self) -> None: model = BasicModel_ConvNet_Optim() @@ -271,14 +530,24 @@ def test_pow(self) -> None: places=6, ) - # def test_pow_error(self) -> None: - # model = BasicModel_ConvNet_Optim() - # with self.assertRaises(TypeError): - # opt_loss.ChannelActivation(model.layer, 0) ** "string" - # with self.assertRaises(TypeError): - # opt_loss.ChannelActivation(model.layer, 0) ** opt_loss.ChannelActivation( - # model.layer, 1 - # ) + def test_pow_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + opt_loss.ChannelActivation(model.layer, 0) ** "string" + + def test_rpow(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = 2.0 ** opt_loss.ChannelActivation(model.layer, 0) + self.assertAlmostEqual( + get_loss_value(model, loss), + 2.0**CHANNEL_ACTIVATION_0_LOSS, + places=6, + ) + + def test_rpow_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + "string" ** opt_loss.ChannelActivation(model.layer, 0) def test_sum_loss_list(self) -> None: n_batch = 400 @@ -295,3 +564,114 @@ def test_sum_loss_list_compose_add(self) -> None: loss_fn = opt_loss.sum_loss_list(loss_fn_list) + opt_loss.LayerActivation(model) out = get_loss_value(model, loss_fn, [n_batch, 3, 1, 1]) self.assertEqual(out, float(n_batch + 1.0)) + + +class TestModuleOP(BaseTest): + def test_module_op_loss_unary_op(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ChannelActivation(model.layer, 0) + composed_loss = opt_loss.module_op(loss, None, operator.neg) + + expected_name = "ChannelActivation [Conv2d(3, 2, ke..., 0]" + self.assertEqual(composed_loss.__name__, expected_name) + output = get_loss_value(model, composed_loss) + expected = -torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS]).sum().item() + self.assertEqual(output, expected) + + def test_module_op_loss_num_add(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ChannelActivation(model.layer, 0) + composed_loss = opt_loss.module_op(loss, 1.0, operator.add) + + expected_name = "ChannelActivation [Conv2d(3, 2, ke..., 0]" + self.assertEqual(composed_loss.__name__, expected_name) + output = get_loss_value(model, composed_loss) + expected = torch.tensor([CHANNEL_ACTIVATION_0_LOSS]) + 1.0 + self.assertEqual(output, expected.item()) + + def test_module_op_loss_loss_add(self) -> None: + model = BasicModel_ConvNet_Optim() + loss1 = opt_loss.ChannelActivation(model.layer, 0) + loss2 = opt_loss.ChannelActivation(model.layer, 1) + composed_loss = opt_loss.module_op(loss1, loss2, operator.add) + + expected_name = ( + "Compose(ChannelActivation [Conv2d(3, 2, ke..., 0], " + + "ChannelActivation [Conv2d(3, 2, ke..., 1])" + ) + self.assertEqual(composed_loss.__name__, expected_name) + output = get_loss_value(model, composed_loss) + expected = ( + torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_0_LOSS]) + .sum() + .item() + ) + self.assertEqual(output, expected) + + def test_module_op_loss_pow_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + opt_loss.module_op( + opt_loss.ChannelActivation(model.layer, 0), "string", operator.pow + ) + + +class TestRModuleOP(BaseTest): + def test_module_op_loss_num_div(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ChannelActivation(model.layer, 0) + composed_loss = opt_loss.rmodule_op(loss, 1.0, operator.pow) + + output = get_loss_value(model, composed_loss) + self.assertEqual(output, 1.0**CHANNEL_ACTIVATION_0_LOSS) + + def test_rmodule_op_loss_pow_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + opt_loss.rmodule_op( + opt_loss.ChannelActivation(model.layer, 0), "string", operator.pow + ) + + +class TestDefaultLossSummarize(BaseTest): + def test_default_loss_summarize(self) -> None: + x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float() + output = opt_loss.default_loss_summarize(x) + self.assertEqual(output.item(), -37.0) + + +class TestMakeArgStr(BaseTest): + def test_make_arg_str(self) -> None: + args = {"a": 5, "b": None} + output = opt_loss._make_arg_str(args) + self.assertEqual(output, "{'a': 5, 'b': N...") + args = {"c": torch.nn.Identity, "d": "test"} + output = opt_loss._make_arg_str(args) + self.assertEqual(output, "{'c': None: + @opt_loss.loss_wrapper + class TestClass: + def __init__( + self, + target: torch.nn.Module, + test_var: int, + batch_index: Optional[int] = None, + ) -> None: + self.target = target + self.batch_index = batch_index + self.test_var = test_var + + def __call__(self) -> int: + return self.test_var + + test_module = TestClass(torch.nn.Identity(), test_var=5, batch_index=0) + self.assertEqual(test_module.__name__, "TestClass [Identity()]") + + test_module = TestClass(torch.nn.Identity(), 5, 0) + self.assertEqual(test_module.__name__, "TestClass [Identity(), 5, 0]") + + test_module = TestClass(torch.nn.Identity(), 5) + self.assertEqual(test_module.__name__, "TestClass [Identity(), 5]") From 69a73b2f958d57cca357fe9984819d62f8aae9fd Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 21 May 2022 16:23:32 -0600 Subject: [PATCH 016/174] Fix mypy test errors --- tests/optim/core/test_loss.py | 41 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index fa24bd9337..47b421d629 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -16,7 +16,7 @@ def get_loss_value( model: torch.nn.Module, loss: opt_loss.Loss, input_shape: List[int] = [1, 3, 1, 1] -) -> Union[int, float]: +) -> Union[int, float, torch.Tensor]: module_outputs = collect_activations(model, loss.target, torch.ones(*input_shape)) loss_value = loss(module_outputs) try: @@ -351,7 +351,7 @@ def __exit__(self, *args: Any) -> None: class TestLoss(BaseTest): def test_loss_init(self) -> None: with _OverrideAbstractFunctions(opt_loss.Loss): - loss = opt_loss.Loss() + loss = opt_loss.Loss() # type: ignore self.assertIsNone(loss.target) self.assertEqual(opt_loss.Loss.__name__, "Loss") @@ -363,7 +363,7 @@ def test_subclass(self) -> None: def test_base_loss_init(self) -> None: model = torch.nn.Identity() with _OverrideAbstractFunctions(opt_loss.BaseLoss): - loss = opt_loss.BaseLoss(model) + loss = opt_loss.BaseLoss(model) # type: ignore self.assertEqual(loss._batch_index, (None, None)) self.assertEqual(loss.batch_index, (None, None)) self.assertEqual(loss._target, model) @@ -373,7 +373,7 @@ def test_base_loss_batch_index(self) -> None: model = torch.nn.Identity() batch_index = 5 with _OverrideAbstractFunctions(opt_loss.BaseLoss): - loss = opt_loss.BaseLoss(model, batch_index=batch_index) + loss = opt_loss.BaseLoss(model, batch_index=batch_index) # type: ignore self.assertEqual(loss._batch_index, (batch_index, batch_index + 1)) self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) @@ -381,7 +381,7 @@ def test_base_loss_target_list(self) -> None: model = torch.nn.Sequential(torch.nn.Identity(), torch.nn.Identity()) targets = [model[0], model[1]] with _OverrideAbstractFunctions(opt_loss.BaseLoss): - loss = opt_loss.BaseLoss(targets) + loss = opt_loss.BaseLoss(targets) # type: ignore self.assertEqual(loss._target, targets) self.assertEqual(loss.target, targets) @@ -460,7 +460,7 @@ def test_multiplication(self) -> None: def test_multiplication_error(self) -> None: model = BasicModel_ConvNet_Optim() with self.assertRaises(TypeError): - opt_loss.ChannelActivation(model.layer, 0) * "string" + opt_loss.ChannelActivation(model.layer, 0) * "string" # type: ignore def test_rmul(self) -> None: model = BasicModel_ConvNet_Optim() @@ -472,7 +472,7 @@ def test_rmul(self) -> None: def test_rmul_error(self) -> None: model = BasicModel_ConvNet_Optim() with self.assertRaises(TypeError): - "string" * opt_loss.ChannelActivation(model.layer, 0) + "string" * opt_loss.ChannelActivation(model.layer, 0) # type: ignore def test_division_loss_type(self) -> None: model = BasicModel_ConvNet_Optim() @@ -494,7 +494,7 @@ def test_division(self) -> None: def test_division_error(self) -> None: model = BasicModel_ConvNet_Optim() with self.assertRaises(TypeError): - opt_loss.ChannelActivation(model.layer, 0) / "string" + opt_loss.ChannelActivation(model.layer, 0) / "string" # type: ignore def test_rdiv(self) -> None: model = BasicModel_ConvNet_Optim() @@ -508,7 +508,7 @@ def test_rdiv(self) -> None: def test_rdiv_error(self) -> None: model = BasicModel_ConvNet_Optim() with self.assertRaises(TypeError): - "string" / opt_loss.ChannelActivation(model.layer, 0) + "string" / opt_loss.ChannelActivation(model.layer, 0) # type: ignore def test_pow_loss_type(self) -> None: model = BasicModel_ConvNet_Optim() @@ -533,7 +533,7 @@ def test_pow(self) -> None: def test_pow_error(self) -> None: model = BasicModel_ConvNet_Optim() with self.assertRaises(TypeError): - opt_loss.ChannelActivation(model.layer, 0) ** "string" + opt_loss.ChannelActivation(model.layer, 0) ** "string" # type: ignore def test_rpow(self) -> None: model = BasicModel_ConvNet_Optim() @@ -547,7 +547,7 @@ def test_rpow(self) -> None: def test_rpow_error(self) -> None: model = BasicModel_ConvNet_Optim() with self.assertRaises(TypeError): - "string" ** opt_loss.ChannelActivation(model.layer, 0) + "string" ** opt_loss.ChannelActivation(model.layer, 0) # type: ignore def test_sum_loss_list(self) -> None: n_batch = 400 @@ -611,9 +611,8 @@ def test_module_op_loss_loss_add(self) -> None: def test_module_op_loss_pow_error(self) -> None: model = BasicModel_ConvNet_Optim() with self.assertRaises(TypeError): - opt_loss.module_op( - opt_loss.ChannelActivation(model.layer, 0), "string", operator.pow - ) + loss = opt_loss.ChannelActivation(model.layer, 0) + opt_loss.module_op(loss, "string", operator.pow) # type: ignore class TestRModuleOP(BaseTest): @@ -628,9 +627,8 @@ def test_module_op_loss_num_div(self) -> None: def test_rmodule_op_loss_pow_error(self) -> None: model = BasicModel_ConvNet_Optim() with self.assertRaises(TypeError): - opt_loss.rmodule_op( - opt_loss.ChannelActivation(model.layer, 0), "string", operator.pow - ) + loss = pt_loss.ChannelActivation(model.layer, 0) + opt_loss.rmodule_op(loss, "string", operator.pow) # type: ignore class TestDefaultLossSummarize(BaseTest): @@ -668,10 +666,13 @@ def __call__(self) -> int: return self.test_var test_module = TestClass(torch.nn.Identity(), test_var=5, batch_index=0) - self.assertEqual(test_module.__name__, "TestClass [Identity()]") + expected = "TestClass [Identity()]" + self.assertEqual(test_module.__name__, expected) # type: ignore test_module = TestClass(torch.nn.Identity(), 5, 0) - self.assertEqual(test_module.__name__, "TestClass [Identity(), 5, 0]") + expected = "TestClass [Identity(), 5, 0]" + self.assertEqual(test_module.__name__, expected) # type: ignore test_module = TestClass(torch.nn.Identity(), 5) - self.assertEqual(test_module.__name__, "TestClass [Identity(), 5]") + expected = "TestClass [Identity(), 5]" + self.assertEqual(test_module.__name__, expected) # type: ignore From caffe7c5b7292e7a5af1a15865f2bd6a1154563a Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 21 May 2022 17:53:37 -0600 Subject: [PATCH 017/174] Fix mypy tests --- tests/optim/core/test_loss.py | 84 +++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 47b421d629..222f4cf03b 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import operator import unittest -from typing import cast, Any, List, Optional, Type, Union +from typing import Any, List, Optional, Type import captum.optim._core.loss as opt_loss import torch @@ -16,13 +16,9 @@ def get_loss_value( model: torch.nn.Module, loss: opt_loss.Loss, input_shape: List[int] = [1, 3, 1, 1] -) -> Union[int, float, torch.Tensor]: +) -> torch.Tensor: module_outputs = collect_activations(model, loss.target, torch.ones(*input_shape)) - loss_value = loss(module_outputs) - try: - return loss_value.item() - except ValueError: - return loss_value.detach() + return loss(module_outputs).detach() class TestDeepDream(BaseTest): @@ -46,14 +42,14 @@ def test_channel_activation_0(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.ChannelActivation(model.layer, 0) self.assertAlmostEqual( - get_loss_value(model, loss), CHANNEL_ACTIVATION_0_LOSS, places=6 + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS, places=6 ) def test_channel_activation_1(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.ChannelActivation(model.layer, 1) self.assertAlmostEqual( - get_loss_value(model, loss), CHANNEL_ACTIVATION_1_LOSS, places=6 + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_1_LOSS, places=6 ) @@ -70,7 +66,7 @@ def test_neuron_activation_0(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.NeuronActivation(model.layer, 0) self.assertAlmostEqual( - get_loss_value(model, loss), CHANNEL_ACTIVATION_0_LOSS, places=6 + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS, places=6 ) @@ -78,7 +74,7 @@ class TestTotalVariation(BaseTest): def test_total_variation(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.TotalVariation(model.layer) - self.assertAlmostEqual(get_loss_value(model, loss), 0.0) + self.assertAlmostEqual(get_loss_value(model, loss).item(), 0.0) class TestL1(BaseTest): @@ -91,7 +87,7 @@ def test_l1(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.L1(model.layer) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS + CHANNEL_ACTIVATION_1_LOSS, places=6, ) @@ -108,7 +104,7 @@ def test_l2(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.L2(model.layer) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), (CHANNEL_ACTIVATION_0_LOSS**2 + CHANNEL_ACTIVATION_1_LOSS**2) ** 0.5, places=5, ) @@ -119,7 +115,7 @@ def test_diversity(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.Diversity(model.layer) self.assertAlmostEqual( - get_loss_value(model, loss, input_shape=[2, 3, 1, 1]), + get_loss_value(model, loss, input_shape=[2, 3, 1, 1]).item(), -1, ) @@ -139,7 +135,7 @@ def test_activation_interpolation_0_1(self) -> None: channel_index2=1, ) self.assertAlmostEqual( - get_loss_value(model, loss, input_shape=[2, 3, 1, 1]), + get_loss_value(model, loss, input_shape=[2, 3, 1, 1]).item(), CHANNEL_ACTIVATION_0_LOSS + CHANNEL_ACTIVATION_1_LOSS, places=6, ) @@ -150,7 +146,7 @@ def test_alignment(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.Alignment(model.layer) self.assertAlmostEqual( - get_loss_value(model, loss, input_shape=[2, 3, 1, 1]), 0.0 + get_loss_value(model, loss, input_shape=[2, 3, 1, 1]).item(), 0.0 ) @@ -169,7 +165,7 @@ def test_direction(self) -> None: loss = opt_loss.Direction(model.layer, vec=torch.ones(2)) b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS]) dot = torch.sum(vec.reshape((1, -1, 1, 1)) * b.reshape((1, -1, 1, 1)), 1) - self.assertAlmostEqual(get_loss_value(model, loss), dot.item(), places=6) + self.assertAlmostEqual(get_loss_value(model, loss).item(), dot.item(), places=6) class TestNeuronDirection(BaseTest): @@ -190,7 +186,7 @@ def test_neuron_direction(self) -> None: loss = opt_loss.NeuronDirection(model.layer, vec=vec) b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS]) dot = torch.sum(b * vec) - self.assertAlmostEqual(get_loss_value(model, loss), dot.item(), places=6) + self.assertAlmostEqual(get_loss_value(model, loss).item(), dot.item(), places=6) def test_neuron_direction_channel_index(self) -> None: model = BasicModel_ConvNet_Optim() @@ -199,7 +195,7 @@ def test_neuron_direction_channel_index(self) -> None: b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS]) dot = torch.sum(b * vec) - self.assertAlmostEqual(get_loss_value(model, loss), dot.item(), places=6) + self.assertAlmostEqual(get_loss_value(model, loss).item(), dot.item(), places=6) class TestAngledNeuronDirection(BaseTest): @@ -223,7 +219,7 @@ def test_angled_neuron_direction(self) -> None: loss = opt_loss.AngledNeuronDirection(model.layer, vec=vec, cossim_pow=0) b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_0_LOSS]) dot = torch.sum(b * vec).item() - output = torch.sum(cast(torch.Tensor, get_loss_value(model, loss))) + output = torch.sum(get_loss_value(model, loss)) self.assertAlmostEqual(output.item(), dot, places=6) def test_angled_neuron_direction_whitened(self) -> None: @@ -237,7 +233,7 @@ def test_angled_neuron_direction_whitened(self) -> None: ) b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_0_LOSS]) dot = torch.sum(vec * b).item() * 2 - output = torch.sum(cast(torch.Tensor, get_loss_value(model, loss))) + output = torch.sum(get_loss_value(model, loss)) self.assertAlmostEqual(output.item(), dot, places=6) def test_angled_neuron_direction_cossim_pow_4(self) -> None: @@ -255,7 +251,7 @@ def test_angled_neuron_direction_cossim_pow_4(self) -> None: cossims = dot / (1.0e-4 + torch.sqrt(torch.sum(a**2))) dot = dot * torch.clamp(cossims, min=0.1) ** cossim_pow - output = get_loss_value(model, loss) + output = get_loss_value(model, loss).item() self.assertAlmostEqual(output, dot.item(), places=6) @@ -273,7 +269,7 @@ def test_tensor_direction(self) -> None: loss = opt_loss.TensorDirection(model.layer, vec=vec) b = torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS]) dot = torch.sum(b[None, :, None, None] * vec).item() - self.assertAlmostEqual(get_loss_value(model, loss), dot, places=6) + self.assertAlmostEqual(get_loss_value(model, loss).item(), dot, places=6) class TestActivationWeights(BaseTest): @@ -394,7 +390,7 @@ def test_negative(self) -> None: model = BasicModel_ConvNet_Optim() loss = -opt_loss.ChannelActivation(model.layer, 0) self.assertAlmostEqual( - get_loss_value(model, loss), -CHANNEL_ACTIVATION_0_LOSS, places=6 + get_loss_value(model, loss).item(), -CHANNEL_ACTIVATION_0_LOSS, places=6 ) def test_addition(self) -> None: @@ -405,7 +401,7 @@ def test_addition(self) -> None: + 1 ) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS + CHANNEL_ACTIVATION_1_LOSS + 1, places=6, ) @@ -414,7 +410,7 @@ def test_radd(self) -> None: model = BasicModel_ConvNet_Optim() loss = 1.0 + opt_loss.ChannelActivation(model.layer, 0) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS + 1.0, places=6, ) @@ -427,15 +423,20 @@ def test_subtraction(self) -> None: - 1 ) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS - CHANNEL_ACTIVATION_1_LOSS - 1, ) def test_rsub(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping CompositeLoss rsub test due to insufficient Torch" + + " version." + ) model = BasicModel_ConvNet_Optim() loss = 1.0 - opt_loss.ChannelActivation(model.layer, 0) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), 1.0 - CHANNEL_ACTIVATION_0_LOSS, ) @@ -445,7 +446,7 @@ def test_multiplication_loss_type(self) -> None: model.layer, 1 ) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS * CHANNEL_ACTIVATION_0_LOSS, places=5, ) @@ -454,7 +455,7 @@ def test_multiplication(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.ChannelActivation(model.layer, 0) * 10 self.assertAlmostEqual( - get_loss_value(model, loss), CHANNEL_ACTIVATION_0_LOSS * 10, places=5 + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS * 10, places=5 ) def test_multiplication_error(self) -> None: @@ -466,7 +467,7 @@ def test_rmul(self) -> None: model = BasicModel_ConvNet_Optim() loss = 10 * opt_loss.ChannelActivation(model.layer, 0) self.assertAlmostEqual( - get_loss_value(model, loss), 10 * CHANNEL_ACTIVATION_0_LOSS, places=5 + get_loss_value(model, loss).item(), 10 * CHANNEL_ACTIVATION_0_LOSS, places=5 ) def test_rmul_error(self) -> None: @@ -480,7 +481,7 @@ def test_division_loss_type(self) -> None: model.layer, 1 ) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS / CHANNEL_ACTIVATION_0_LOSS, ) @@ -488,7 +489,7 @@ def test_division(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.ChannelActivation(model.layer, 0) / 10 self.assertAlmostEqual( - get_loss_value(model, loss), CHANNEL_ACTIVATION_0_LOSS / 10 + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS / 10 ) def test_division_error(self) -> None: @@ -568,6 +569,11 @@ def test_sum_loss_list_compose_add(self) -> None: class TestModuleOP(BaseTest): def test_module_op_loss_unary_op(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping ModuleOP unary op test due to insufficient Torch" + + " version." + ) model = BasicModel_ConvNet_Optim() loss = opt_loss.ChannelActivation(model.layer, 0) composed_loss = opt_loss.module_op(loss, None, operator.neg) @@ -579,6 +585,11 @@ def test_module_op_loss_unary_op(self) -> None: self.assertEqual(output, expected) def test_module_op_loss_num_add(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping ModuleOP loss add num test due to insufficient Torch" + + " version." + ) model = BasicModel_ConvNet_Optim() loss = opt_loss.ChannelActivation(model.layer, 0) composed_loss = opt_loss.module_op(loss, 1.0, operator.add) @@ -590,6 +601,11 @@ def test_module_op_loss_num_add(self) -> None: self.assertEqual(output, expected.item()) def test_module_op_loss_loss_add(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping ModuleOP Loss add Loss test due to insufficient Torch" + + " version." + ) model = BasicModel_ConvNet_Optim() loss1 = opt_loss.ChannelActivation(model.layer, 0) loss2 = opt_loss.ChannelActivation(model.layer, 1) @@ -627,7 +643,7 @@ def test_module_op_loss_num_div(self) -> None: def test_rmodule_op_loss_pow_error(self) -> None: model = BasicModel_ConvNet_Optim() with self.assertRaises(TypeError): - loss = pt_loss.ChannelActivation(model.layer, 0) + loss = opt_loss.ChannelActivation(model.layer, 0) opt_loss.rmodule_op(loss, "string", operator.pow) # type: ignore From 973aacc0373e5427a73085265d0e631a605e2c6f Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 21 May 2022 18:00:07 -0600 Subject: [PATCH 018/174] Update test_loss.py --- tests/optim/core/test_loss.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 222f4cf03b..5ac5660a16 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -501,7 +501,7 @@ def test_rdiv(self) -> None: model = BasicModel_ConvNet_Optim() loss = 10.0 / opt_loss.ChannelActivation(model.layer, 0) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), 10.0 / CHANNEL_ACTIVATION_0_LOSS, places=6, ) @@ -517,7 +517,7 @@ def test_pow_loss_type(self) -> None: model.layer, 1 ) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS**CHANNEL_ACTIVATION_0_LOSS, places=6, ) @@ -526,7 +526,7 @@ def test_pow(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.ChannelActivation(model.layer, 0) ** 2 self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS**2, places=6, ) @@ -540,7 +540,7 @@ def test_rpow(self) -> None: model = BasicModel_ConvNet_Optim() loss = 2.0 ** opt_loss.ChannelActivation(model.layer, 0) self.assertAlmostEqual( - get_loss_value(model, loss), + get_loss_value(model, loss).item(), 2.0**CHANNEL_ACTIVATION_0_LOSS, places=6, ) From 6e6f4e6fb7a902118ae8a5c8f9a2ca5d9e95506d Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 22 May 2022 13:17:47 -0600 Subject: [PATCH 019/174] Add more tests --- tests/optim/core/test_loss.py | 177 ++++++++++++++++++++++++++++++++-- 1 file changed, 170 insertions(+), 7 deletions(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 5ac5660a16..5db47db850 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import operator import unittest -from typing import Any, List, Optional, Type +from typing import Any, List, Optional, Type, Union import captum.optim._core.loss as opt_loss import torch @@ -15,14 +15,20 @@ def get_loss_value( - model: torch.nn.Module, loss: opt_loss.Loss, input_shape: List[int] = [1, 3, 1, 1] + model: torch.nn.Module, + loss: opt_loss.Loss, + model_input: Union[List[int], torch.Tensor] = [1, 3, 1, 1], ) -> torch.Tensor: - module_outputs = collect_activations(model, loss.target, torch.ones(*input_shape)) + if isinstance(model_input, (list, tuple)): + model_input = torch.ones(*model_input) + else: + assert isinstance(model_input, torch.Tensor) + module_outputs = collect_activations(model, loss.target, model_input) return loss(module_outputs).detach() class TestDeepDream(BaseTest): - def test_channel_deepdream(self) -> None: + def test_deepdream(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.DeepDream(model.layer) expected = torch.as_tensor( @@ -30,6 +36,42 @@ def test_channel_deepdream(self) -> None: )[None, :] assertTensorAlmostEqual(self, get_loss_value(model, loss), expected, mode="max") + def test_deepdream_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + loss = opt_loss.DeepDream(model, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + assertTensorAlmostEqual( + self, output, model_input[batch_index : batch_index + 1] ** 2, delta=0.0 + ) + + +class TestLayerActivation(BaseTest): + def test_layer_activation(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = opt_loss.LayerActivation(model.layer) + output = get_loss_value(model, loss) + expected = torch.as_tensor( + [CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS] + ) + + assertTensorAlmostEqual(self, output, expected[None, :, None, None], delta=0.0) + + def test_layer_activation_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + loss = opt_loss.LayerActivation(model, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + assertTensorAlmostEqual( + self, output, model_input[batch_index : batch_index + 1], delta=0.0 + ) + class TestChannelActivation(BaseTest): def test_channel_activation_init(self) -> None: @@ -52,6 +94,24 @@ def test_channel_activation_1(self) -> None: get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_1_LOSS, places=6 ) + def test_channel_index_activation_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + channel_index = 2 + loss = opt_loss.ChannelActivation( + model, channel_index=channel_index, batch_index=batch_index + ) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + assertTensorAlmostEqual( + self, + output, + model_input[batch_index : batch_index + 1, channel_index], + delta=0.0, + ) + class TestNeuronActivation(BaseTest): def test_neuron_activation_init(self) -> None: @@ -69,6 +129,24 @@ def test_neuron_activation_0(self) -> None: get_loss_value(model, loss).item(), CHANNEL_ACTIVATION_0_LOSS, places=6 ) + def test_neuron_activation_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + channel_index = 2 + loss = opt_loss.NeuronActivation( + model, channel_index=channel_index, batch_index=batch_index + ) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + assertTensorAlmostEqual( + self, + output, + model_input[batch_index : batch_index + 1, channel_index, 2:3, 2:3], + delta=0.0, + ) + class TestTotalVariation(BaseTest): def test_total_variation(self) -> None: @@ -76,6 +154,16 @@ def test_total_variation(self) -> None: loss = opt_loss.TotalVariation(model.layer) self.assertAlmostEqual(get_loss_value(model, loss).item(), 0.0) + def test_total_variation_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + loss = opt_loss.TotalVariation(model, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + self.assertEqual(output.item(), 360.0) + class TestL1(BaseTest): def test_l1_init(self) -> None: @@ -92,6 +180,16 @@ def test_l1(self) -> None: places=6, ) + def test_l1_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + loss = opt_loss.L1(model, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + self.assertEqual(output.item(), 8400.0) + class TestL2(BaseTest): def test_l2_init(self) -> None: @@ -109,13 +207,23 @@ def test_l2(self) -> None: places=5, ) + def test_l2_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + loss = opt_loss.L2(model, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + self.assertEqual(output.item(), 987.9017944335938) + class TestDiversity(BaseTest): def test_diversity(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.Diversity(model.layer) self.assertAlmostEqual( - get_loss_value(model, loss, input_shape=[2, 3, 1, 1]).item(), + get_loss_value(model, loss, model_input=[2, 3, 1, 1]).item(), -1, ) @@ -135,7 +243,7 @@ def test_activation_interpolation_0_1(self) -> None: channel_index2=1, ) self.assertAlmostEqual( - get_loss_value(model, loss, input_shape=[2, 3, 1, 1]).item(), + get_loss_value(model, loss, model_input=[2, 3, 1, 1]).item(), CHANNEL_ACTIVATION_0_LOSS + CHANNEL_ACTIVATION_1_LOSS, places=6, ) @@ -146,7 +254,7 @@ def test_alignment(self) -> None: model = BasicModel_ConvNet_Optim() loss = opt_loss.Alignment(model.layer) self.assertAlmostEqual( - get_loss_value(model, loss, input_shape=[2, 3, 1, 1]).item(), 0.0 + get_loss_value(model, loss, model_input=[2, 3, 1, 1]).item(), 0.0 ) @@ -167,6 +275,29 @@ def test_direction(self) -> None: dot = torch.sum(vec.reshape((1, -1, 1, 1)) * b.reshape((1, -1, 1, 1)), 1) self.assertAlmostEqual(get_loss_value(model, loss).item(), dot.item(), places=6) + def test_direction_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + vec = torch.tensor([0, 1, 0]).float() + loss = opt_loss.Direction(model, vec=vec, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + + expected = torch.tensor( + [ + [ + [100.0, 101.0, 102.0, 103.0, 104.0], + [105.0, 106.0, 107.0, 108.0, 109.0], + [110.0, 111.0, 112.0, 113.0, 114.0], + [115.0, 116.0, 117.0, 118.0, 119.0], + [120.0, 121.0, 122.0, 123.0, 124.0], + ] + ] + ) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + assertTensorAlmostEqual(self, output, expected, delta=0.0) + class TestNeuronDirection(BaseTest): def test_neuron_direction_init(self) -> None: @@ -197,6 +328,17 @@ def test_neuron_direction_channel_index(self) -> None: dot = torch.sum(b * vec) self.assertAlmostEqual(get_loss_value(model, loss).item(), dot.item(), places=6) + def test_neuron_direction_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + vec = torch.tensor([0, 1, 0]).float() + loss = opt_loss.NeuronDirection(model, vec=vec, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + self.assertEqual(output.item(), 112.0) + class TestAngledNeuronDirection(BaseTest): def test_neuron_activation_init(self) -> None: @@ -254,6 +396,17 @@ def test_angled_neuron_direction_cossim_pow_4(self) -> None: output = get_loss_value(model, loss).item() self.assertAlmostEqual(output, dot.item(), places=6) + def test_angled_neuron_direction_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + vec = torch.tensor([1, 0, 1]).float() + loss = opt_loss.AngledNeuronDirection(model, vec=vec, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + self.assertEqual(output.item(), 1.5350958108901978) + class TestTensorDirection(BaseTest): def test_tensor_init(self) -> None: @@ -271,6 +424,16 @@ def test_tensor_direction(self) -> None: dot = torch.sum(b[None, :, None, None] * vec).item() self.assertAlmostEqual(get_loss_value(model, loss).item(), dot, places=6) + def test_tensor_direction_batch_index(self) -> None: + model = torch.nn.Identity() + batch_index = 1 + vec = torch.tensor([1, 0, 1, 0]).float().reshape((1, -1, 1, 1)) + loss = opt_loss.TensorDirection(model, vec=vec, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 1 * 5 * 5).view(5, 1, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(output.item(), 74.0) + class TestActivationWeights(BaseTest): def test_neuron_activation_init(self) -> None: From 221c72b7a2bc9d0032b92148975ed37e4aa8e2db Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 22 May 2022 13:37:27 -0600 Subject: [PATCH 020/174] Fix weird value mismatch --- tests/optim/core/test_loss.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 5db47db850..097a8f7646 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -57,8 +57,13 @@ def test_layer_activation(self) -> None: expected = torch.as_tensor( [CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_1_LOSS] ) + expected = expected[None, :, None, None] - assertTensorAlmostEqual(self, output, expected[None, :, None, None], delta=0.0) + if version.parse(torch.__version__) <= version.parse("1.6.0"): + delta = 1.0e-5 + else: + delta = 0.0 + assertTensorAlmostEqual(self, output, expected, delta=delta) def test_layer_activation_batch_index(self) -> None: model = torch.nn.Identity() From 862ddce625ee419d4e0ca38f5f8791fc4ac517cf Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 22 May 2022 14:00:11 -0600 Subject: [PATCH 021/174] Add batch_index tests to new objectives --- tests/optim/core/test_loss.py | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 39d8ef4ee1..6ae0105f55 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -222,6 +222,16 @@ def test_l2mean_channel_index(self) -> None: expected = (CHANNEL_ACTIVATION_0_LOSS - constant) ** 2 self.assertAlmostEqual(output, expected, places=6) + def test_l2mean_batch_index(self) -> None: + raise unittest.SkipTest("Remove after PR merged") + model = torch.nn.Identity() + batch_index = 1 + loss = opt_loss.L2Mean(model, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 4 * 5 * 5).view(5, 4, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(output.item(), 23034.25) + class TestVectorLoss(BaseTest): def test_vectorloss_init(self) -> None: @@ -246,6 +256,17 @@ def test_vectorloss_multiple_channels(self) -> None: output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6]) self.assertAlmostEqual(output, CHANNEL_ACTIVATION_1_LOSS * 2, places=6) + def test_vectorloss_batch_index(self) -> None: + raise unittest.SkipTest("Remove after PR merged") + model = torch.nn.Identity() + batch_index = 1 + vec = torch.tensor([0, 1, 0, 0]).float() + loss = opt_loss.VectorLoss(model, vec=vec, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 4 * 5 * 5).view(5, 4, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(output.item(), 137.0) + class TestFacetLoss(BaseTest): def test_facetloss_init(self) -> None: @@ -355,6 +376,27 @@ def test_facetloss_2d_weights(self) -> None: expected = (CHANNEL_ACTIVATION_0_LOSS * 2) * 1.5 self.assertAlmostEqual(output, expected / 10.0, places=6) + def test_facetloss_batch_index(self) -> None: + raise unittest.SkipTest("Remove after PR merged") + batch_index = 1 + layer = torch.nn.Conv2d(2, 3, 1, bias=True) + layer.weight.data.fill_(0.1) # type: ignore + layer.bias.data.fill_(1) # type: ignore + model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) + + vec = torch.tensor([0, 1, 0]).float() + facet_weights = torch.ones([1, 2, 1, 1]) * 1.5 + loss = opt_loss.FacetLoss( + ultimate_target=model[1], + layer_target=model[0].layer, + vec=vec, + facet_weights=facet_weights, + batch_index=batch_index, + ) + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertAlmostEqual(output.item(), 10.38000202178955, places=5) + class TestCompositeLoss(BaseTest): def test_negative(self) -> None: From b196bbe3d0d1814163c490ada9cae1419acc4ada Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 24 May 2022 18:22:08 -0600 Subject: [PATCH 022/174] Miscellaneous Fixes * Wrap all remaining `torch.__version__` calls in `version.parse`. * Remove unused version check in `typing.py`. * Expose `MaxPool2dRelaxed` to users so that tutorials using it work. * Expose `dataset` module to users. * Fixed `show` & `save_tensor_as_image` docs. --- captum/optim/__init__.py | 2 ++ captum/optim/_param/image/images.py | 11 +++++----- captum/optim/_utils/image/common.py | 8 ++++---- captum/optim/_utils/image/dataset.py | 15 ++++++++++---- captum/optim/_utils/typing.py | 20 ++++++++----------- captum/optim/models/__init__.py | 2 ++ .../optim/models/test_googlenet_places365.py | 18 ++++++++--------- tests/optim/param/test_images.py | 2 +- tests/optim/param/test_transforms.py | 2 +- 9 files changed, 44 insertions(+), 36 deletions(-) diff --git a/captum/optim/__init__.py b/captum/optim/__init__.py index 9177d5c62c..828ac03dd2 100644 --- a/captum/optim/__init__.py +++ b/captum/optim/__init__.py @@ -7,6 +7,7 @@ from captum.optim._param.image.images import ImageTensor # noqa: F401 from captum.optim._utils import circuits, reducer # noqa: F401 from captum.optim._utils.image import atlas # noqa: F401 +from captum.optim._utils.image import dataset # noqa: F401 from captum.optim._utils.image.common import ( # noqa: F401 hue_to_rgb, make_grid_image, @@ -28,6 +29,7 @@ "reducer", "make_grid_image", "atlas", + "dataset", "hue_to_rgb", "nchannels_to_rgb", "save_tensor_as_image", diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py index fa313b38af..3fade94f64 100644 --- a/captum/optim/_param/image/images.py +++ b/captum/optim/_param/image/images.py @@ -117,10 +117,11 @@ def show( grid image. Default is set to None for no grid image creation. Default: None padding (int, optional): The amount of padding between images in the grid - images. This parameter only has an effect if `nrow` is not None. + images. This parameter only has an effect if `images_per_row` is not + None. Default: 2 pad_value (float, optional): The value to use for the padding. This - parameter only has an effect if `nrow` is not None. + parameter only has an effect if `images_per_row` is not None. Default: 0.0 """ show( @@ -158,10 +159,10 @@ def export( grid image. Default is set to None for no grid image creation. Default: None padding (int, optional): The amount of padding between images in the grid - images. This parameter only has an effect if `nrow` is not None. - Default: 2 + images. This parameter only has an effect if `images_per_row` is not + None. pad_value (float, optional): The value to use for the padding. This - parameter only has an effect if `nrow` is not None. + parameter only has an effect if `images_per_row` is not None. Default: 0.0 """ save_tensor_as_image( diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py index f1cdc5f477..39a6ada5ea 100644 --- a/captum/optim/_utils/image/common.py +++ b/captum/optim/_utils/image/common.py @@ -90,10 +90,10 @@ def show( grid image. Default is set to None for no grid image creation. Default: None padding (int, optional): The amount of padding between images in the grid - images. This parameter only has an effect if nrow is not None. + images. This parameter only has an effect if `images_per_row` is not None. Default: 2 pad_value (float, optional): The value to use for the padding. This parameter - only has an effect if nrow is not None. + only has an effect if `images_per_row` is not None. Default: 0.0 """ @@ -140,10 +140,10 @@ def save_tensor_as_image( grid image. Default is set to None for no grid image creation. Default: None padding (int, optional): The amount of padding between images in the grid - images. This parameter only has an effect if `nrow` is not None. + images. This parameter only has an effect if `images_per_row` is not None. Default: 2 pad_value (float, optional): The value to use for the padding. This parameter - only has an effect if `nrow` is not None. + only has an effect if `images_per_row` is not None. Default: 0.0 """ diff --git a/captum/optim/_utils/image/dataset.py b/captum/optim/_utils/image/dataset.py index c894173990..66bf18b53a 100644 --- a/captum/optim/_utils/image/dataset.py +++ b/captum/optim/_utils/image/dataset.py @@ -1,6 +1,7 @@ from typing import cast import torch +from packaging import version try: from tqdm.auto import tqdm @@ -73,6 +74,15 @@ def dataset_cov_matrix( return cov_mtx +# Handle older versions of PyTorch +# Defined outside of function in order to support JIT +_torch_norm = ( + torch.linalg.norm + if version.parse(torch.__version__) >= version.parse("1.7.0") + else torch.norm +) + + def cov_matrix_to_klt( cov_mtx: torch.Tensor, normalize: bool = False, epsilon: float = 1e-10 ) -> torch.Tensor: @@ -90,13 +100,10 @@ def cov_matrix_to_klt( *tensor*: A KLT matrix for the specified covariance matrix. """ - # Handle older versions of PyTorch - torch_norm = torch.linalg.norm if torch.__version__ >= "1.9.0" else torch.norm - U, S, V = torch.svd(cov_mtx) svd_sqrt = U @ torch.diag(torch.sqrt(S + epsilon)) if normalize: - svd_sqrt / torch.max(torch_norm(svd_sqrt, dim=0)) + svd_sqrt / torch.max(_torch_norm(svd_sqrt, dim=0)) return svd_sqrt diff --git a/captum/optim/_utils/typing.py b/captum/optim/_utils/typing.py index a0e3d6f1c0..10d37bd835 100755 --- a/captum/optim/_utils/typing.py +++ b/captum/optim/_utils/typing.py @@ -1,7 +1,8 @@ import sys from typing import Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union -from torch import Tensor, __version__ +from torch import Tensor +from torch import distributions from torch.nn import Module from torch.optim import Optimizer @@ -33,16 +34,11 @@ def cleanup(self) -> None: LossFunction = Callable[[ModuleOutputMapping], Tensor] SingleTargetLossFunction = Callable[[Tensor], Tensor] -if __version__ < "1.4.0": - NumSeqOrTensorOrProbDistType = Union[Sequence[int], Sequence[float], Tensor] -else: - from torch import distributions - - NumSeqOrTensorOrProbDistType = Union[ - Sequence[int], - Sequence[float], - Tensor, - distributions.distribution.Distribution, - ] +NumSeqOrTensorOrProbDistType = Union[ + Sequence[int], + Sequence[float], + Tensor, + distributions.distribution.Distribution, +] IntSeqOrIntType = Union[List[int], Tuple[int], Tuple[int, int], int] TupleOfTensorsOrTensorType = Union[Tuple[Tensor, ...], Tensor] diff --git a/captum/optim/models/__init__.py b/captum/optim/models/__init__.py index 0f809d5ef5..687aab0f85 100755 --- a/captum/optim/models/__init__.py +++ b/captum/optim/models/__init__.py @@ -1,4 +1,5 @@ from ._common import ( # noqa: F401 + MaxPool2dRelaxed, RedirectedReluLayer, SkipLayer, collect_activations, @@ -17,6 +18,7 @@ ) __all__ = [ + "MaxPool2dRelaxed", "RedirectedReluLayer", "SkipLayer", "collect_activations", diff --git a/tests/optim/models/test_googlenet_places365.py b/tests/optim/models/test_googlenet_places365.py index d6e9cf321d..84f9291fb9 100644 --- a/tests/optim/models/test_googlenet_places365.py +++ b/tests/optim/models/test_googlenet_places365.py @@ -11,7 +11,7 @@ class TestInceptionV1Places365(BaseTest): def test_load_inceptionv1_places365_with_redirected_relu(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping load pretrained InceptionV1 Places365 due to insufficient" + " Torch version." @@ -22,7 +22,7 @@ def test_load_inceptionv1_places365_with_redirected_relu(self) -> None: self.assertTrue(check_layer_in_model(model, RedirectedReluLayer)) def test_load_inceptionv1_places365_no_redirected_relu(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping load pretrained InceptionV1 Places365 RedirectedRelu test" + " due to insufficient Torch version." @@ -34,7 +34,7 @@ def test_load_inceptionv1_places365_no_redirected_relu(self) -> None: self.assertTrue(check_layer_in_model(model, torch.nn.ReLU)) def test_load_inceptionv1_places365_linear(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping load pretrained InceptionV1 Places365 linear test due to" + " insufficient Torch version." @@ -47,7 +47,7 @@ def test_load_inceptionv1_places365_linear(self) -> None: self.assertTrue(check_layer_in_model(model, torch.nn.AvgPool2d)) def test_inceptionv1_places365_transform(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping InceptionV1 Places365 internal transform test due to" + " insufficient Torch version." @@ -62,7 +62,7 @@ def test_inceptionv1_places365_transform(self) -> None: assertTensorAlmostEqual(self, output, expected_output, 0) def test_inceptionv1_places365_transform_warning(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping InceptionV1 Places365 internal transform warning test due" + " to insufficient Torch version." @@ -75,7 +75,7 @@ def test_inceptionv1_places365_transform_warning(self) -> None: model._transform_input(x) def test_inceptionv1_places365_load_and_forward(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping basic pretrained InceptionV1 Places365 forward test due to" + " insufficient Torch version." @@ -86,7 +86,7 @@ def test_inceptionv1_places365_load_and_forward(self) -> None: self.assertEqual([list(o.shape) for o in outputs], [[1, 365]] * 3) def test_inceptionv1_places365_load_and_forward_diff_sizes(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping pretrained InceptionV1 Places365 forward with different" + " sized inputs test due to insufficient Torch version." @@ -102,7 +102,7 @@ def test_inceptionv1_places365_load_and_forward_diff_sizes(self) -> None: self.assertEqual([list(o.shape) for o in outputs2], [[1, 365]] * 3) def test_inceptionv1_places365_forward_no_aux(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping pretrained InceptionV1 Places365 with aux logits forward" + " test due to insufficient Torch version." @@ -113,7 +113,7 @@ def test_inceptionv1_places365_forward_no_aux(self) -> None: self.assertEqual(list(outputs.shape), [1, 365]) def test_inceptionv1_places365_forward_cuda(self) -> None: - if torch.__version__ <= "1.6.0": + if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping pretrained InceptionV1 Places365 forward CUDA test due to" + " insufficient Torch version." diff --git a/tests/optim/param/test_images.py b/tests/optim/param/test_images.py index 617d34a3a3..0ca59c1920 100644 --- a/tests/optim/param/test_images.py +++ b/tests/optim/param/test_images.py @@ -443,7 +443,7 @@ def test_simple_tensor_parameterization_with_grad(self) -> None: self.assertTrue(image_param.tensor.requires_grad) def test_simple_tensor_parameterization_jit_module_with_grad(self) -> None: - if torch.__version__ <= "1.8.0": + if version.parse(torch.__version__) <= version.parse("1.8.0"): raise unittest.SkipTest( "Skipping SimpleTensorParameterization JIT module test due to" + " insufficient Torch version." diff --git a/tests/optim/param/test_transforms.py b/tests/optim/param/test_transforms.py index 385006a7ac..362fce9649 100644 --- a/tests/optim/param/test_transforms.py +++ b/tests/optim/param/test_transforms.py @@ -1335,7 +1335,7 @@ def test_ignore_alpha(self) -> None: assert rgb_tensor.size(1) == 3 def test_ignore_alpha_jit_module(self) -> None: - if torch.__version__ <= "1.8.0": + if version.parse(torch.__version__) <= version.parse("1.8.0"): raise unittest.SkipTest( "Skipping IgnoreAlpha JIT module test due to insufficient" + " Torch version." From c65665865947cbd2326ac44592fd0c54746ced15 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 25 May 2022 13:21:52 -0600 Subject: [PATCH 023/174] Add Model Preparation Tutorial --- ...ingStarted_ModelPreparation_OptimViz.ipynb | 469 ++++++++++++++++++ 1 file changed, 469 insertions(+) create mode 100644 tutorials/optimviz/GettingStarted_ModelPreparation_OptimViz.ipynb diff --git a/tutorials/optimviz/GettingStarted_ModelPreparation_OptimViz.ipynb b/tutorials/optimviz/GettingStarted_ModelPreparation_OptimViz.ipynb new file mode 100644 index 0000000000..ea83ff0146 --- /dev/null +++ b/tutorials/optimviz/GettingStarted_ModelPreparation_OptimViz.ipynb @@ -0,0 +1,469 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "GettingStarted_ModelPreparation_OptimViz.ipynb", + "provenance": [], + "collapsed_sections": [ + "3MSB2RhA4h8E" + ] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Preparing Models For Captum's Optim Module\n", + "\n", + "While most models will work out of the box with the Optim module, some model may require a few minor changes for full compatibility. This tutorial demonstrates how to easily perform the suggested & required changes to models for use with the Optim module." + ], + "metadata": { + "id": "QVpft54KA-P_" + } + }, + { + "cell_type": "code", + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import captum.optim as opt\n", + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")" + ], + "metadata": { + "id": "KD5InqKt3Hjc" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Model Layer Changes\n", + "\n", + "The Optim module's layer related functions, and optimization systems rely on layers being defined as `nn.Module` classes rather than functional layers. Specifically, Optim's loss optimization and activation collection rely on PyTorch's hook system via [`register_forward_hook`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html?highlight=register_forward_hook#torch.nn.Module.register_forward_hook), and functional layers do not support hooks.\n", + "Other functions like `replace_layers` can only detect `nn.Module` objects inside models.\n", + "\n", + "\n", + "For the purpose of this tutorial, our main toy model does not use any functional layers. Though if you are wishing to use your own model then you should verify that all applicable functional layers have been changed to their `nn.Module` equivalents in your chosen model.\n", + "\n", + "* A list of all PyTorch's `torch.nn.functional` layers can be found [here](https://pytorch.org/docs/stable/nn.functional.html), and each layer has links to their `nn.Module` equivalents.\n", + "\n", + "* The most common change that you will likely encounter, is converting the functional [`F.relu`](https://pytorch.org/docs/stable/generated/torch.nn.functional.relu.html#torch.nn.functional.relu) layers to [`nn.ReLU`](https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html)." + ], + "metadata": { + "id": "3MSB2RhA4h8E" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Tutorial Setup\n", + "\n", + "Below we define a simple toy model and a functional version of the toy model for usage in our examples." + ], + "metadata": { + "id": "QGIfQki3Dn2M" + } + }, + { + "cell_type": "code", + "source": [ + "class ToyModel(torch.nn.Module):\n", + " def __init__(self) -> None:\n", + " super().__init__()\n", + " self.basic_module = torch.nn.Sequential(\n", + " torch.nn.Conv2d(3, 4, kernel_size=3, stride=2),\n", + " torch.nn.ReLU(),\n", + " torch.nn.MaxPool2d(kernel_size=3, stride=2),\n", + " )\n", + " self.conv = torch.nn.Conv2d(4, 4, kernel_size=3, stride=2)\n", + " self.bn = torch.nn.BatchNorm2d(4)\n", + " self.relu = torch.nn.ReLU()\n", + " self.pooling = torch.nn.AdaptiveAvgPool2d((2, 2))\n", + " self.linear = torch.nn.Linear(16, 4)\n", + "\n", + " def forward(self, x: torch.Tensor) -> torch.Tensor:\n", + " x = self.basic_module(x)\n", + " x = self.conv(x)\n", + " x = self.bn(x)\n", + " x = self.relu(x)\n", + " x = self.pooling(x)\n", + " x = x.flatten()\n", + " x = self.linear(x)\n", + " return x\n", + "\n", + "\n", + "class ToyModelFunctional(torch.nn.Module):\n", + " \"\"\"Functional layer only version of our toy model\"\"\"\n", + "\n", + " def __init__(self) -> None:\n", + " super().__init__()\n", + "\n", + " def forward(self, x: torch.Tensor) -> torch.Tensor:\n", + " x = F.conv2d(x, weight=torch.ones([4, 3, 3, 3]), kernel_size=3, stride=2)\n", + " x = F.relu(x)\n", + " x = F.max_pool2d(x, kernel_size=3, stride=2)\n", + "\n", + " x = F.conv2d(x, weight=torch.ones([4, 3, 3, 3]), kernel_size=3, stride=2)\n", + " x = F.batch_norm(x, running_mean=torch.ones([4]), running_var=torch.ones([4]))\n", + " x = F.relu(x)\n", + " x = F.adaptive_avg_pool2d(input, (2, 2))\n", + " x = x.flatten()\n", + " x = F.linear(input, weight=torch.ones([4, 16]))\n", + " return x" + ], + "metadata": { + "id": "X79d0fh_3LuT" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## The Basics: Targetable Layers\n", + "\n", + "The optim module's `opt.models.collect_activations` function and loss objectives (`opt.loss.`) rely on forward hooks using PyTorch's hook system. This means that functional layers cannot be used as optimization targets, and activations cannot be collected for them.\n", + "\n", + "Models can easily be checked for compatible layers via the `opt.models.get_model_layers` function as we'll see below." + ], + "metadata": { + "id": "UjEdNgauOdbZ" + } + }, + { + "cell_type": "code", + "source": [ + "# Functional version of the toy model with no nn.Module layers\n", + "toy_model_functional = ToyModelFunctional().eval().to(device)\n", + "\n", + "# Get hookable layers\n", + "possible_targets = opt.models.get_model_layers(toy_model_functional)\n", + "\n", + "print(\"Possible targets:\", possible_targets)" + ], + "metadata": { + "id": "uEPS3SOqcl47", + "outputId": "fe01c649-97e2-4565-db99-96ced48ce15b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Possible targets: []\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "As you can see, no layers capable of being hooked were found in our functional layer model.\n", + "\n", + "Below we use the `opt.models.get_model_layers` function to see a list of all the hookable layers in our non-functional model that we can use as targets." + ], + "metadata": { + "id": "46YGHAeRdBmE" + } + }, + { + "cell_type": "code", + "source": [ + "# Toy model with only nn.Module layers\n", + "target_model = ToyModel().eval().to(device)\n", + "\n", + "# Get hookable layers\n", + "possible_targets = opt.models.get_model_layers(target_model)\n", + "\n", + "# Display hookable layers\n", + "print(\"Possible targets:\")\n", + "for t in possible_targets:\n", + " print(\" target_model.\" + t)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TlZ5UwiVPptG", + "outputId": "169fb32f-3648-444c-b89b-db9f5cf9121a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Possible targets:\n", + " target_model.basic_module\n", + " target_model.basic_module[0]\n", + " target_model.basic_module[1]\n", + " target_model.basic_module[2]\n", + " target_model.conv\n", + " target_model.bn\n", + " target_model.relu\n", + " target_model.pooling\n", + " target_model.linear\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "We can then easily use any of the targets found above for optimization and activation collection, as we show below." + ], + "metadata": { + "id": "iHTSN71dWh5o" + } + }, + { + "cell_type": "code", + "source": [ + "target_model = ToyModel().eval().to(device)\n", + "\n", + "# Set layer target\n", + "target_layer = target_model.conv\n", + "\n", + "# Collect activations from target\n", + "activations_dict = opt.models.collect_activations(\n", + " model=target_model, targets=target_layer\n", + ")\n", + "\n", + "# Collect target from activations dict\n", + "activations = activations_dict[target_layer]\n", + "\n", + "# Display activation shape\n", + "print(\"Output shape of the {} layer activations:\".format(type(target_layer)))\n", + "print(\" {} \\n\".format(activations.shape))\n", + "\n", + "# We can also use the target for loss objectives\n", + "loss_fn = opt.loss.LayerActivation(target=target_layer)\n", + "\n", + "# Print loss objective\n", + "print(\"Loss objective:\", loss_fn)\n", + "print(\" target:\", loss_fn.target)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tiD7qBzlQ6Zw", + "outputId": "674df320-9fb4-46aa-8bf2-1acd534a7a61" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Output shape of the layer activations:\n", + " torch.Size([1, 4, 27, 27]) \n", + "\n", + "Loss objective: LayerActivation []\n", + " target: Conv2d(4, 4, kernel_size=(3, 3), stride=(2, 2))\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Visualization: Redirected ReLU\n", + "\n", + "In some cases, the target of interest may not be activated at all by the initial random input. If this happens, the zero derivative stops the gradient from flowing backwards and thus we never move towards any meaningful visualization. To solve this problem, we can replace the ReLU layers in a model with a special version of ReLU called `RedirectedReLU`. The `RedirectedReLU` layer allows the gradient to flow temporarily in these zero gradient situations.\n", + "\n", + "Below we use the `opt.models.replace_layers` function to replace all instances of `nn.ReLU` in our toy model with `opt.models.RedirectedReluLayer`." + ], + "metadata": { + "id": "MlGvyhd0AalX" + } + }, + { + "cell_type": "code", + "source": [ + "relu_model = ToyModel().eval().to(device)\n", + "\n", + "# Replace the ReLU with RedirectedReluLayer\n", + "opt.models.replace_layers(\n", + " relu_model, layer1=torch.nn.ReLU, layer2=opt.models.RedirectedReluLayer\n", + ")\n", + "\n", + "# Show modified model\n", + "print(relu_model)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4w34RcZU_DrU", + "outputId": "596aef9f-26d8-4e87-fdaf-71211e29699b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ToyModel(\n", + " (basic_module): Sequential(\n", + " (0): Conv2d(3, 4, kernel_size=(3, 3), stride=(2, 2))\n", + " (1): RedirectedReluLayer()\n", + " (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " )\n", + " (conv): Conv2d(4, 4, kernel_size=(3, 3), stride=(2, 2))\n", + " (bn): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): RedirectedReluLayer()\n", + " (pooling): AdaptiveAvgPool2d(output_size=(2, 2))\n", + " (linear): Linear(in_features=16, out_features=4, bias=True)\n", + ")\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Circuits: Linear Operation Layers\n", + "\n", + "Certain functions like `opt.circuits.extract_expanded_weights` require using modules that only perform linear operations. This can become slightly more complicated when dealing with layers that have multiple preset set variables. Luckily the `opt.models.replace_layers` function can easily handle these variable transfers for layer types like pooling layers if the `transfer_vars` variable is set to `True`.\n", + "\n", + "\n", + "Common linear layer replacements are as follows:\n", + "\n", + "* `nn.ReLU` layers need to be skipped, which can be done by replacing them with either `nn.Identity` or Captum's `SkipLayer` layer.\n", + "\n", + "* `nn.MaxPool2d` layers need to be converted to their linear `nn.AvgPool2d` layer equivalents.\n", + "\n", + "* `nn.AdaptiveMaxPool2d` layers need to be converted to their linear `nn.AdaptiveAvgPool2d` layer equivalents.\n", + "\n", + "Some of the layers which are already linear operations are:\n", + "\n", + "* `nn.BatchNorm2d` is linear when it's in evaluation mode (`.eval()`).\n", + "* `nn.Conv2d` is linear.\n", + "* `nn.Linear` is linear." + ], + "metadata": { + "id": "KJVG3KDC31dy" + } + }, + { + "cell_type": "code", + "source": [ + "linear_only_model = ToyModel().eval().to(device)\n", + "\n", + "# Replace MaxPool2d with AvgPool2d using the same settings\n", + "opt.models.replace_layers(\n", + " linear_only_model,\n", + " layer1=torch.nn.MaxPool2d,\n", + " layer2=torch.nn.AvgPool2d,\n", + " transfer_vars=True, # Use same MaxPool2d parameters for AvgPool2d\n", + ")\n", + "\n", + "# Replace ReLU with Identity\n", + "opt.models.replace_layers(\n", + " linear_only_model, layer1=torch.nn.ReLU, layer2=torch.nn.Identity\n", + ")\n", + "\n", + "# Show modified model\n", + "print(linear_only_model)" + ], + "metadata": { + "id": "hYbm5Cg34She", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a35a33e2-04c3-4563-b139-ab28127b4f90" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ToyModel(\n", + " (basic_module): Sequential(\n", + " (0): Conv2d(3, 4, kernel_size=(3, 3), stride=(2, 2))\n", + " (1): Identity()\n", + " (2): AvgPool2d(kernel_size=3, stride=2, padding=0)\n", + " )\n", + " (conv): Conv2d(4, 4, kernel_size=(3, 3), stride=(2, 2))\n", + " (bn): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (relu): Identity()\n", + " (pooling): AdaptiveAvgPool2d(output_size=(2, 2))\n", + " (linear): Linear(in_features=16, out_features=4, bias=True)\n", + ")\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Other: Relaxed Pooling\n", + "\n", + "Some attribution based operations like those used in activation atlas sample collection, require replacing the `nn.MaxPool2d` layers with a special relaxed version called `MaxPool2dRelaxed`. This is also extremely easy to do with the `replace_layers` function like we did above." + ], + "metadata": { + "id": "MXXUIcEBk7_k" + } + }, + { + "cell_type": "code", + "source": [ + "relaxed_pooling_model = ToyModel().eval().to(device).basic_module\n", + "\n", + "# Replace MaxPool2d with MaxPool2dRelaxed\n", + "opt.models.replace_layers(\n", + " relaxed_pooling_model,\n", + " torch.nn.MaxPool2d,\n", + " opt.models.MaxPool2dRelaxed,\n", + " transfer_vars=True, # Use same MaxPool2d parameters for MaxPool2dRelaxed\n", + ")\n", + "\n", + "# Show modified model\n", + "print(relaxed_pooling_model)" + ], + "metadata": { + "id": "fWjY33RKkFi8", + "outputId": "f0e0a0d9-fd1f-4857-ea60-e8a2127607fd", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Sequential(\n", + " (0): Conv2d(3, 4, kernel_size=(3, 3), stride=(2, 2))\n", + " (1): ReLU()\n", + " (2): MaxPool2dRelaxed(\n", + " (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (avgpool): AvgPool2d(kernel_size=3, stride=2, padding=0)\n", + " )\n", + ")\n" + ] + } + ] + } + ] +} \ No newline at end of file From 0e7d0f45c2fac21a6a793147f68dfd1a5f9f7eea Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 25 May 2022 14:47:42 -0600 Subject: [PATCH 024/174] Improve vector function --- captum/optim/_utils/image/common.py | 14 ++++++++------ tests/optim/utils/image/test_common.py | 13 +++++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py index 31af3169ef..1f2cced14f 100644 --- a/captum/optim/_utils/image/common.py +++ b/captum/optim/_utils/image/common.py @@ -385,7 +385,7 @@ def _create_new_vector( Args: x (torch.Tensor): A set of 2d or 4d activations. - vec (torch.Tensor): A direction vector to use, with a compatible shape for + vec (torch.Tensor): A 1D direction vector to use, with a compatible shape for computing the matrix product of the activations. See torch.matmul for See torch.matmul for more details on compatible shapes: https://pytorch.org/docs/stable/generated/torch.matmul.html @@ -405,12 +405,14 @@ def _create_new_vector( stored vector. """ assert x.device == vec.device - assert x.dim() > 1 + assert x.dim() > 1 and vec.dim() == 1 if activation_fn: x = activation_fn(x) - if x.dim() > 2 and move_channel_dim_to_final_dim: - permute_vals = [0] + list(range(x.dim()))[2:] + [1] - x = x.permute(*permute_vals) - return torch.mean(x @ vec, [1, 2]) + if x.dim() > 2: + if move_channel_dim_to_final_dim: + permute_vals = [0] + list(range(x.dim()))[2:] + [1] + x = x.permute(*permute_vals) + mean_vals = list(range(1, x.dim() - 1)) + return torch.mean(x @ vec, mean_vals) else: return (x @ vec)[:, None] diff --git a/tests/optim/utils/image/test_common.py b/tests/optim/utils/image/test_common.py index fcece26683..09e1a7355c 100644 --- a/tests/optim/utils/image/test_common.py +++ b/tests/optim/utils/image/test_common.py @@ -550,3 +550,16 @@ def test_create_new_vector_no_activation_fn(self) -> None: vec = torch.tensor([1, 1, 1]).float() out = common._create_new_vector(x, vec, activation_fn=None) self.assertEqual(out.item(), 0.0) + + def test_create_new_vector_channels_last(self) -> None: + x = torch.arange(0, 4 * 5 * 5 * 3).view(4, 5, 5, 3).float() + vec = torch.tensor([0, 1, 0]).float() + out = common._create_new_vector(x, vec, move_channel_dim_to_final_dim=False) + self.assertEqual(out.tolist(), [37.0, 112.0, 187.0, 262.0]) + + def test_create_new_vector_dim_2(self) -> None: + x = torch.arange(0, 1 * 3).view(1, 3).float() + vec = torch.tensor([0, 1, 0]).float() + out = common._create_new_vector(x, vec) + self.assertEqual(list(out.shape), [1, 1]) + self.assertEqual(out.item(), 1.0) From 3b67bb047723497ae18afaf99bbf9e5dc67d55ba Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 28 May 2022 11:12:48 -0600 Subject: [PATCH 025/174] Improve the `FacetLoss` objective * Improve efficiency of the `FacetLoss` objective. --- captum/optim/_core/loss.py | 42 ++++++++++++-------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index cd52f02951..731eeb2346 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -1005,36 +1005,12 @@ def __init__( assert facet_weights.dim() == 4 or facet_weights.dim() == 2 self.facet_weights = facet_weights - def _get_strength(self, batch: int, device: torch.device) -> torch.Tensor: - """ - Calculate batch weighting. - - Args: - - batch (int): The size of the batch dimension to use. - device (torch.device): The device to use. - - Returns: - strength_t (torch.Tensor): A tensor containing the weights to multiply the - different batch dimensions by. - """ - if isinstance(self.strength, (tuple, list)): - strength_t = torch.linspace( - self.strength[0], - self.strength[1], - steps=batch, - device=device, - ) - else: - strength_t = torch.ones([1], device=device) * self.strength - return strength_t[:, None, None, None] - def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: activations_ultimate = targets_to_values[self.ultimate_target] - activations_ultimate = activations_ultimate - new_vec = _create_new_vector(activations_ultimate, self.vec)[ + activations_ultimate = activations_ultimate[ self.batch_index[0] : self.batch_index[1] ] + new_vec = _create_new_vector(activations_ultimate, self.vec) target_activations = targets_to_values[self.layer_target] layer_grad = torch.autograd.grad( @@ -1042,15 +1018,23 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: inputs=target_activations, grad_outputs=torch.ones_like(new_vec), retain_graph=True, - )[0] + )[0].detach()[self.batch_index[0] : self.batch_index[1]] layer = target_activations[self.batch_index[0] : self.batch_index[1]] - flat_attr = layer * torch.nn.functional.relu(layer_grad.detach()) + flat_attr = layer * torch.nn.functional.relu(layer_grad) if self.facet_weights.dim() == 2 and flat_attr.dim() == 4: flat_attr = torch.sum(flat_attr, dim=(2, 3)) if self.strength: - strength_t = self._get_strength(new_vec.shape[0], flat_attr.device) + if isinstance(self.strength, (tuple, list)): + strength_t = torch.linspace( + self.strength[0], + self.strength[1], + steps=flat_attr.shape[0], + device=flat_attr.device, + ).reshape(flat_attr.shape[0], *[1] * (flat_attr.dim() - 1)) + else: + strength_t = self.strength flat_attr = strength_t * flat_attr return torch.sum(flat_attr * self.facet_weights) From 4c51ef1c1f1ead191b370f589c61e478096d612f Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 28 May 2022 12:21:19 -0600 Subject: [PATCH 026/174] Add CLIP objectives to `__all__` --- captum/optim/_core/loss.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 731eeb2346..01894ae078 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -1110,6 +1110,9 @@ def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor: "AngledNeuronDirection", "TensorDirection", "ActivationWeights", + "L2Mean", + "VectorLoss", + "FacetLoss", "sum_loss_list", "default_loss_summarize", ] From 36df47e363ae7d5297b05af6c34d51646d7cb02b Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 28 May 2022 12:28:14 -0600 Subject: [PATCH 027/174] Separate some loss tests --- tests/optim/core/test_loss.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 097a8f7646..97f4c78ed1 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -718,6 +718,8 @@ def test_rpow_error(self) -> None: with self.assertRaises(TypeError): "string" ** opt_loss.ChannelActivation(model.layer, 0) # type: ignore + +class TestSumLossList(BaseTest): def test_sum_loss_list(self) -> None: n_batch = 400 model = torch.nn.Identity() From 31cb2a903330cb87e5dfbb76871a0138606d6a7e Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 28 May 2022 15:17:51 -0600 Subject: [PATCH 028/174] Fix mistake in FacetLoss docs --- captum/optim/_core/loss.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 01894ae078..6542e828b7 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -980,15 +980,16 @@ def __init__( the model. layer_target (nn.Module): A layer that we have facet_weights for. This target layer should be below the ultimate_target layer in the model. + facet_weights (torch.Tensor): Weighting that steers the objective + towards a particular theme or concept. These weight values should + come from linear probes trained on layer_target. strength (float, list of float, optional): A single float or list of floats to use for batch dimension weighting. If using a single value, then it will be applied to all batch dimensions equally. Otherwise a list of floats with a shape of: [start, end] should be used for torch.linspace to calculate the step values in between. Default is set to None for no weighting. - facet_weights (torch.Tensor): Weighting that steers the objective - towards a particular theme or concept. These weight values should - come from linear probes trained on layer_target. + Default: None batch_index (int, optional): The index of the activations to optimize if optimizing a batch of activations. If set to None, defaults to all activations in the batch. From cfa9d9f60f2997b007a9ed801b659ffd6fd54271 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 31 May 2022 12:35:19 -0600 Subject: [PATCH 029/174] Update CustomModules tutorial for new changes --- .../optimviz/CustomModules_OptimViz.ipynb | 1459 ++++++++++------- 1 file changed, 897 insertions(+), 562 deletions(-) diff --git a/tutorials/optimviz/CustomModules_OptimViz.ipynb b/tutorials/optimviz/CustomModules_OptimViz.ipynb index 22d88fde12..ae556a1b0b 100644 --- a/tutorials/optimviz/CustomModules_OptimViz.ipynb +++ b/tutorials/optimviz/CustomModules_OptimViz.ipynb @@ -1,579 +1,914 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "2ylZPub2JTMH" - }, - "source": [ - "# Creating Custom Captum.optim Modules\n", - "Captum's Optim library contains an extensive list of optimization objectives, transforms, and input parameterizations. However, some cases may require adding new features to these areas of Captum's Optim library. Luckily adding them to Captum is easy!" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "GWrStkUVEbOC" - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "from typing import Dict, List, Optional, Tuple, Union\n", - "\n", - "import torch\n", - "import torchvision\n", - "from captum.optim.models import googlenet\n", - "\n", - "import captum.optim as opt\n", - "\n", - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", - "\n", - "model = googlenet(pretrained=True).to(device)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DffA7pFSFZY0" - }, - "source": [ - "## Custom Image Transforms\n", - "\n", - "If both Captum and Torchvision lack the transforms that you require, then you can create your own custom transforms.\n", - "\n", - "Custom image transform classes must contain a `forward()` function. The first transform in a list of transforms takes an input tensor with a shape of (B, C, W, H), and the final transform in a list of transforms will need to output a tensor with the same shape of (B, C, W, H). Captum and Torchvision's transforms normally expect and output a shape of (B, C, W, H).\n", - "\n", - "An optional `__init__()` function can be used as well.\n", - "\n", - "\n", - "Note that all custom transforms need to be autograd compatible, so that the gradient is not interrupted during the optimization process.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "hoyneR7FFTXK" - }, - "outputs": [], - "source": [ - "class CustomTransform(torch.nn.Module):\n", - " def __init__(self, val: int = 1) -> None:\n", - " super(CustomTransform, self).__init__()\n", - " self.val = val\n", - "\n", - " def forward(self, input: torch.Tensor) -> torch.Tensor:\n", - " return input * self.val" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2kjc9istEzVz" - }, - "source": [ - "## Custom Loss Functions\n", - "Captum's loss functions are composed of classes that the optimization function uses. Custom loss classes should inherit the base loss class `opt.loss.BaseLoss` and also have the `opt.loss.loss_wrapper` decorator.\n", - "\n", - "For now, the `opt.loss.loss_wrapper` decorator primarily serves to update the name and string representations of the loss function, but future work may also add other generic loss attributes via the decorator.\n", - "\n", - "Custom loss functions must contain the following two functions:\n", - "\n", - "\n", - "* The `__init__()` function must at least contain a `target` variable. The `target` variable should be an `nn.module` or list of `nn.modules` to collect activations from. Other variables can be added after the `target`. An optional variable is `batch_index`, which is an `int`. The `batch_index` is used to target a specific image in a batch of input images.\n", - "\n", - "* The `__call__()` function takes activations from the target layer and then returns a loss value. Activations sent to the call function are extracted from a dictionary with the target as the key." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "LQZECwPoEdET" - }, - "outputs": [], - "source": [ - "@opt.loss.loss_wrapper\n", - "class CustomLoss(opt.loss.BaseLoss):\n", - " def __init__(self, target: Union[torch.nn.Module, List[torch.nn.Module]], batch_index: Optional[int] = None) -> \"CustomLoss\":\n", - " opt.loss.BaseLoss.__init__(self, target, batch_index)\n", - "\n", - " def __call__(\n", - " self, target_activations: Dict[torch.nn.Module, Optional[torch.Tensor]]\n", - " ) -> torch.Tensor:\n", - " # Get activations from target\n", - " # self.batch_index is a tuple of (batch_index, batch_index+1)\n", - " activations = target_activations[self.target][self.batch_index[0]:self.batch_index[1]]\n", - " return activations" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Custom Loss Summarize Functions\n", - "\n", - "In addition to the loss function, there is also the `loss_summarize_fn` that can be supplied to the `optimize` method of `InputOptimization`. This function dictates how the final loss is computed and aggregated before we call the `backward` method on it to compute gradients.\n", - "\n", - "Here we show the default summarize function to give an idea of what this function does. The default summarize function simply computes the mean of the loss tensor and multiplies it by -1 so that the optimization maximizes the activations." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def custom_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor:\n", - " return -1 * loss_value.mean()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K45Xg0HGF3VH" - }, - "source": [ - "## Custom Image Parameterization\n", - "\n", - "\n", - "The image parameters that Captum's Optim library optimizes to produce visualizations is stored in a custom image parameterization class. \n", - "\n", - "Custom parameterization must contain the following two functions:\n", - "\n", - "### Init function\n", - "\n", - "The `__init__()` function has 3 input variables:\n", - "\n", - "* size (tuple, int): dimensions in the form height, width. \n", - "\n", - "* channels (int): the number of channels for the output tensor.\n", - "\n", - "* batch (int): the desired batch size to use.\n", - "\n", - "* init (torch.Tensor): An optional input tensor with a shape of: (B, C, W, H).\n", - "\n", - "Make sure that the tensor being optimized is wrapped in `torch.nn.Parameter` and that it can be called by the `forward()` function.\n", - "\n", - "### Forward function\n", - "\n", - "The `forward()` function has zero input varibles and returns a 4 dimension tensor with a shape of (B, C, W, H):\n", - "\n", - "* The tensor being optimized should be called from where it was saved in the init function. This tensor will then be returned when the forward function is called.\n", - "\n", - "* The dimensions of the output tensor should be named: 'B', 'C', 'H', and 'W'." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "Hm2HLX9VFmAT" - }, - "outputs": [], - "source": [ - "class CustomImage(opt.images.ImageParameterization):\n", - " def __init__(\n", - " self,\n", - " size: Tuple[int, int] = (224, 224),\n", - " channels: int = 3,\n", - " batch: int = 1,\n", - " init: torch.Tensor = None,\n", - " ) -> None:\n", - " super().__init__()\n", - " if init is None:\n", - " assert size is not None\n", - " # Create random input with a shape of: B, C, W, H\n", - " init = torch.randn([batch, channels, size[0], size[1]])\n", - " else:\n", - " assert init.dim() == 4\n", - " self.image = torch.nn.Parameter(init) # Convert input to nn.Parameter()\n", - "\n", - " def forward(self) -> torch.Tensor:\n", - " return self.image.refine_names(\"B\", \"C\", \"H\", \"W\") # rename dimensions" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x_AK29oiH9Z3" - }, - "source": [ - "## Running Captum with custom modules\n", - "\n", - "Below is a helper function that will let us quickly and easily experiment with our custom modules from above. Random scaling and random spatial jitter transforms are also included in the helper function to improve output quality." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "uQ9sEz8cG2El" - }, - "outputs": [], - "source": [ - "def visualize(model: torch.nn.Module, target: torch.nn.Module):\n", - " # Define our custom image parameterization, then add it to NaturalImage\n", - " image_param = CustomImage\n", - " image = opt.images.NaturalImage(size=(224, 224), parameterization=image_param, batch=2).to(device)\n", - "\n", - " transforms = torch.nn.Sequential(\n", - " CustomTransform(), # Add our custom transform to the list of transforms\n", - "\n", - " # Additional transforms to improve output quality\n", - " opt.transforms.RandomSpatialJitter(16),\n", - " opt.transforms.RandomScale(scale=(1, 0.975, 1.025, 0.95, 1.05)),\n", - " )\n", - "\n", - " # Define our custom loss function as the loss function\n", - " loss_fn = CustomLoss(target, batch_index=0) # Only optimize 0th image to demonstrate batch_index\n", - "\n", - " obj = opt.InputOptimization(model, loss_fn, image, transforms)\n", - " history = obj.optimize(\n", - " stop_criteria=opt.optimization.n_steps(512),\n", - " loss_summarize_fn=custom_loss_summarize, # Our custom loss_summarize_fn\n", - " )\n", - " image().show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And there you have it! Notice that only the left image (at index 0) is optimized since we specified `batch_index=0` when defining `loss_fn`. The right image is unchanged from its random initialization." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 298, - "referenced_widgets": [ - "5c666868d62e4862a648cd0df15155ec", - "389469a07da6435eb2a1be7ea55f4f86", - "36b86b673b544cc5bdb5652eb31cabc9", - "6d93392ab27048068aa8bb1d7ef01cf1", - "2c759e9a43754fc4963a9631cc7702c5", - "8fa32da11a2a4401a57a50f80af7be32", - "ba6b8e0c07074921a5faa7dbc29f3fe3", - "ea6b900b717c4e8f8051094882aeef1f" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "2ylZPub2JTMH" + }, + "source": [ + "# Creating Custom Captum.optim Modules\n", + "Captum's Optim library contains an extensive list of optimization objectives, transforms, and input parameterizations. However, some cases may require adding new features to these areas of Captum's Optim library. Luckily adding them to Captum is easy!" + ] }, - "id": "3m5iQ2zfqV5F", - "outputId": "40b79b81-363c-49c6-8546-9b8ada61665a" - }, - "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3ee58c51e28e4977b0c45befa0511b4c", - "version_major": 2, - "version_minor": 0 + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GWrStkUVEbOC" }, - "text/plain": [ - " 0%| | 0/512 [00:00" + "cell_type": "markdown", + "metadata": { + "id": "DffA7pFSFZY0" + }, + "source": [ + "## Custom Image Transforms\n", + "\n", + "If both Captum and Torchvision lack the transforms that you require, then you can create your own custom transforms.\n", + "\n", + "Custom image transform classes must contain a `forward()` function. The first transform in a list of transforms takes an input tensor with a shape of (B, C, W, H), and the final transform in a list of transforms will need to output a tensor with the same shape of (B, C, W, H). Captum and Torchvision's transforms normally expect and output a shape of (B, C, W, H).\n", + "\n", + "An optional `__init__()` function can be used as well.\n", + "\n", + "\n", + "Note that all custom transforms need to be autograd compatible, so that the gradient is not interrupted during the optimization process.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hoyneR7FFTXK" + }, + "outputs": [], + "source": [ + "class CustomTransform(torch.nn.Module):\n", + " def __init__(self, val: int = 1) -> None:\n", + " super().__init__()\n", + " self.val = val\n", + "\n", + " def forward(self, input: torch.Tensor) -> torch.Tensor:\n", + " return input * self.val" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2kjc9istEzVz" + }, + "source": [ + "## Custom Loss Objectives\n", + "Captum's loss objectives are composed of classes that the optimization function uses. Custom loss classes should inherit the base loss class `opt.loss.BaseLoss` and also have the `opt.loss.loss_wrapper` decorator.\n", + "\n", + "For now, the `opt.loss.loss_wrapper` decorator primarily serves to update the name and string representations of the loss objective, but future work may also add other generic loss attributes via the decorator. This decorator is required for custom loss objectives.\n", + "\n", + "Custom loss objectives must contain the following two functions:\n", + "\n", + "**The init function**\n", + "\n", + "* The `__init__()` function must at least contain a `target` variable. The `target` variable should be an `nn.module` or list of `nn.modules` to collect activations from. Other variables can be added after the `target`.\n", + "\n", + "* An optional variable is `batch_index`, which is either an `int` or a list of `int`. The `batch_index` is used to target a specific image in a batch of input images.\n", + "\n", + "* The init function should call the `BaseLoss` `__init__` function and provide it with the target `nn.Module` or list of `nn.Module` along with the `batch_index`.\n", + "\n", + "**The call function**\n", + "\n", + "* The `__call__()` function takes activations from the target layer and then returns a loss value. Activations sent to the call function are extracted from a dictionary with the target as the key." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LQZECwPoEdET" + }, + "outputs": [], + "source": [ + "@opt.loss.loss_wrapper\n", + "class CustomLoss(opt.loss.BaseLoss):\n", + " def __init__(\n", + " self,\n", + " target: Union[torch.nn.Module, List[torch.nn.Module]],\n", + " batch_index: Optional[Union[int, List[int]]] = None, # Optional parameter\n", + " ) -> None:\n", + " opt.loss.BaseLoss.__init__(self, target, batch_index)\n", + "\n", + " def __call__(\n", + " self, target_activations: Dict[torch.nn.Module, Optional[torch.Tensor]]\n", + " ) -> torch.Tensor:\n", + "\n", + " # Get activations for target from input dict\n", + " activations = target_activations[self.target]\n", + "\n", + " # self.batch_index is a tuple of (batch_index, batch_index+1)\n", + " activations = activations[self.batch_index[0] : self.batch_index[1]]\n", + "\n", + " # Return activations for loss summarization\n", + " return activations" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JmrUOtGbZW5J" + }, + "source": [ + "## Custom Loss Summarize Functions\n", + "\n", + "In addition to the loss objectives, there is also the loss summarization function that can be supplied to the `optimize` method of `InputOptimization`. This function dictates how the final loss is computed and aggregated before we call the `backward` method on it to compute gradients.\n", + "\n", + "Here we show the default summarize function to give an idea of what this function does. The default summarize function simply computes the mean of the loss tensor and multiplies it by -1 so that the optimization maximizes the activations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zhxtI_LjZW5K" + }, + "outputs": [], + "source": [ + "def custom_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor:\n", + " return -1 * loss_value.mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K45Xg0HGF3VH" + }, + "source": [ + "## Custom Image Parameterization\n", + "\n", + "\n", + "The image parameters that Captum's Optim library optimizes to produce visualizations is stored in a custom image parameterization class. \n", + "\n", + "Custom parameterization must contain the following two functions:\n", + "\n", + "### Init function\n", + "\n", + "The `__init__()` function has 3 input variables:\n", + "\n", + "* size (tuple, int): dimensions in the form height, width. \n", + "\n", + "* channels (int): the number of channels for the output tensor.\n", + "\n", + "* batch (int): the desired batch size to use.\n", + "\n", + "* init (torch.Tensor): An optional input tensor with a shape of: (B, C, W, H).\n", + "\n", + "Make sure that the tensor being optimized is wrapped in `torch.nn.Parameter` and that it can be called by the `forward()` function.\n", + "\n", + "Note that the `__init__()` function can contain any number of variable inputs if the image parameterization is passed as an instance to `NaturalImage`. Otherwise the init function requirements are required.\n", + "\n", + "### Forward function\n", + "\n", + "The `forward()` function has zero input variables and returns a 4 dimension tensor with a shape of (B, C, W, H):\n", + "\n", + "* The tensor being optimized should be called from where it was saved in the init function. This tensor will then be returned when the forward function is called.\n", + "\n", + "* The dimensions of the output tensor should be named: 'B', 'C', 'H', and 'W', unless you are using TorchScript / JIT.\n", + "\n", + "* As JIT does not yet support named dimensions, you can use [`torch.jit.is_scripting`](https://pytorch.org/docs/stable/jit_language_reference.html?highlight=is_scripting#torch.jit.is_scripting) to only name the dimensions when not using JIT." ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "visualize(model, model.mixed4a)" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "CustomModules_OptimViz.ipynb", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "2c759e9a43754fc4963a9631cc7702c5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "initial" - } }, - "36b86b673b544cc5bdb5652eb31cabc9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "100%", - "description_tooltip": null, - "layout": "IPY_MODEL_8fa32da11a2a4401a57a50f80af7be32", - "max": 128, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2c759e9a43754fc4963a9631cc7702c5", - "value": 128 - } + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Hm2HLX9VFmAT" + }, + "outputs": [], + "source": [ + "class CustomImage(opt.images.ImageParameterization):\n", + " def __init__(\n", + " self,\n", + " size: Tuple[int, int] = (224, 224),\n", + " channels: int = 3,\n", + " batch: int = 1,\n", + " init: torch.Tensor = None,\n", + " ) -> None:\n", + " super().__init__()\n", + " if init is None:\n", + " assert size is not None\n", + " # Create random input with a shape of: B, C, W, H\n", + " init = torch.randn([batch, channels, size[0], size[1]])\n", + " else:\n", + " assert init.dim() == 4\n", + " self.image = torch.nn.Parameter(init) # Convert input to nn.Parameter()\n", + "\n", + " def forward(self) -> torch.Tensor:\n", + " if torch.jit.is_scripting():\n", + " return self.image\n", + " return self.image.refine_names(\"B\", \"C\", \"H\", \"W\") # rename dimensions" + ] }, - "389469a07da6435eb2a1be7ea55f4f86": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "cell_type": "markdown", + "source": [ + "## Custom StopCriteria\n", + "\n", + "StopCriteria functions tell the `InputOptimization.optimize` function when to stop optimizing the input param. We provide 4 possible sources of information after each step for the stop criteria function to determine when to stop the optimization process.\n", + "\n", + "The default Captum `opt.optimization.n_steps` function returns a stop criteria function called `continue_while`. The `continue_while` function takes 4 input variables every step during the optimization process:\n", + "\n", + "* `step` (int): The current optimization step.\n", + "\n", + "* `obj`: The current instance of InputOptimization being used.\n", + "\n", + "* `history` (list of torch.Tensor): A list of loss values per iteration. The size of the list is equal to the number of steps that have already been performed. The last value in the list corresponds to the current step.\n", + "\n", + "* `optim` (torch.optim.Optimizer): The current instance of the optimizer being used.\n", + "\n", + "All stop criteria functions or classes using `__call__` functions, should accept the same 4 inputs as `continue_while`. They are also expected to return a boolean value for each step to indicate whether optimization should continue.\n", + "\n", + "Note that these requirements may not exist for custom optimization functions, which can utilize their own custom stopping criteria.\n" + ], + "metadata": { + "id": "FfbTtiC5g83U" + } }, - "5c666868d62e4862a648cd0df15155ec": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_36b86b673b544cc5bdb5652eb31cabc9", - "IPY_MODEL_6d93392ab27048068aa8bb1d7ef01cf1" + { + "cell_type": "code", + "source": [ + "from tqdm.auto import tqdm\n", + "\n", + "\n", + "# Main setup function\n", + "def n_steps_custom(n: int, show_progress: bool = True):\n", + "\n", + " # Setup progress bar so that we can monitor progress\n", + " if show_progress:\n", + " pbar = tqdm(total=n, unit=\" step\")\n", + "\n", + " # The stop Criteria function\n", + " def continue_while(\n", + " step: int,\n", + " obj: opt.InputOptimization,\n", + " history: Iterable[torch.Tensor],\n", + " optim: torch.optim.Optimizer,\n", + " ) -> bool:\n", + " if len(history) > 0:\n", + " if show_progress:\n", + " # Print current optimization step and loss value\n", + " pbar.set_postfix(\n", + " {\"Objective\": f\"{history[-1].mean():.1f}\"}, refresh=False\n", + " )\n", + "\n", + " # Return True if we haven't reached the target num of optimization steps\n", + " if step < n:\n", + " if show_progress:\n", + " pbar.update()\n", + " return True\n", + "\n", + " # Return False if we have reached the target num of optimization steps\n", + " else:\n", + " if show_progress:\n", + " pbar.close()\n", + " return False\n", + "\n", + " # Return StopCriteria function to use for optimization\n", + " return continue_while" ], - "layout": "IPY_MODEL_389469a07da6435eb2a1be7ea55f4f86" - } + "metadata": { + "id": "_AFuQcdqg8Xx" + }, + "execution_count": null, + "outputs": [] }, - "6d93392ab27048068aa8bb1d7ef01cf1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ea6b900b717c4e8f8051094882aeef1f", - "placeholder": "​", - "style": "IPY_MODEL_ba6b8e0c07074921a5faa7dbc29f3fe3", - "value": " 128/128 [00:42<00:00, 2.99 step/s, Objective=356.1]" - } + { + "cell_type": "markdown", + "source": [ + "\n", + "## Custom Optimization Functions\n", + "\n", + "While the default `optimize` function from `InputOptimization` usually suffices for most use cases, you may find yourself needing something different. For example if you want to use a [learning rate scheduler](https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate), or if you wish to use an optimizer like LBFGS which requires a `closure` function [passed to their step function](https://pytorch.org/docs/stable/optim.html#taking-an-optimization-step).\n", + "\n", + "To create a custom optimization function, we will recreate the default `optimize` function while replacing `self` with the `InputOptimization` instance. We can then simply pass our `InputOptimization` instance to the function in order to render our results.\n", + "\n", + "Important `InputOptimization` Functions & Attributes:\n", + "\n", + "* The `.parameters()` function returns the list of input parameters requiring grad.\n", + "* The `.loss()` function returns the loss function values.\n", + "* The `.cleanup()` function removes the hooks that were used to collect activations.\n", + "* The model being used can be accessed via `.model` attribute.\n", + "* The transforms being used can be accessed via `.transforms` attribute." + ], + "metadata": { + "id": "uh1HqWb9ajpa" + } }, - "8fa32da11a2a4401a57a50f80af7be32": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "cell_type": "code", + "source": [ + "def custom_optimize(\n", + " obj: opt.InputOptimization,\n", + " stop_criteria: Optional[Callable] = None,\n", + " optimizer: Optional[torch.optim.Optimizer] = None,\n", + " loss_summarize_fn: Optional[Callable] = None,\n", + " lr: float = 0.025,\n", + ") -> torch.Tensor:\n", + "\n", + " # Setup conditions for when to stop optimizing\n", + " stop_criteria = stop_criteria or opt.optimization.n_steps(512)\n", + "\n", + " # Pass the parameters of our optimization task to the optimizer\n", + " optimizer = optimizer or torch.optim.Adam(obj.parameters(), lr=lr)\n", + " assert isinstance(optimizer, torch.optim.Optimizer)\n", + "\n", + " # Set the loss summarization function\n", + " loss_summarize_fn = loss_summarize_fn or opt.loss.default_loss_summarize\n", + "\n", + " history: List[torch.Tensor] = []\n", + " step: int = 0\n", + "\n", + " # Run optimization loop with protection\n", + " try:\n", + "\n", + " # Stop criteria requires 4 variables from the optimization process\n", + " while stop_criteria(step, obj, history, optimizer):\n", + " optimizer.zero_grad()\n", + "\n", + " # Summarize any non scalar loss values\n", + " loss_value = loss_summarize_fn(obj.loss())\n", + "\n", + " # Place loss values from the current step into history list\n", + " history.append(loss_value.clone().detach())\n", + "\n", + " loss_value.backward()\n", + " optimizer.step()\n", + " # scheduler.step() # LR Scheduler step location\n", + " step += 1\n", + "\n", + " # Always run final clean up\n", + " finally:\n", + " obj.cleanup()\n", + "\n", + " # Return optimization loss history for all optimization steps\n", + " return torch.stack(history)" + ], + "metadata": { + "id": "VVfP7PTHafox" + }, + "execution_count": null, + "outputs": [] }, - "ba6b8e0c07074921a5faa7dbc29f3fe3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "cell_type": "markdown", + "metadata": { + "id": "x_AK29oiH9Z3" + }, + "source": [ + "## Running Captum with custom modules\n", + "\n", + "Below is a helper function that will let us quickly and easily experiment with our custom modules from above. Random scaling and random spatial jitter transforms are also included in the helper function to improve output quality." + ] }, - "ea6b900b717c4e8f8051094882aeef1f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uQ9sEz8cG2El" + }, + "outputs": [], + "source": [ + "def visualize(model: torch.nn.Module, target: torch.nn.Module):\n", + " # Define our custom image parameterization, then add it to NaturalImage\n", + " image_param = CustomImage\n", + " image = opt.images.NaturalImage(\n", + " size=(224, 224), parameterization=image_param, batch=2\n", + " ).to(device)\n", + "\n", + " transforms = torch.nn.Sequential(\n", + " CustomTransform(), # Add our custom transform to the list of transforms\n", + " # Additional transforms to improve output quality\n", + " opt.transforms.RandomSpatialJitter(16),\n", + " opt.transforms.RandomScale(scale=(1, 0.975, 1.025, 0.95, 1.05)),\n", + " )\n", + "\n", + " # Define our custom loss function as the loss function\n", + " loss_fn = CustomLoss(\n", + " target, batch_index=0 # Only optimize 0th image to demonstrate batch_index\n", + " )\n", + "\n", + " obj = opt.InputOptimization(model, loss_fn, image, transforms)\n", + " history = custom_optimize( # Our custom optimization function\n", + " obj=obj,\n", + " stop_criteria=n_steps_custom(512), # Our custom stop criteria\n", + " loss_summarize_fn=custom_loss_summarize, # Our custom loss_summarize_fn\n", + " )\n", + " image().show(figsize=(10, 5), images_per_row=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Oi5-40h_ZW5O" + }, + "source": [ + "And there you have it! Notice that only the left image (at index 0) is optimized since we specified `batch_index=0` when defining `loss_fn`. The right image is unchanged from its random initialization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 335, + "referenced_widgets": [ + "42c156add91d4acaadcdefa7d261363e", + "b6d1bc1fa28140e2839110ea31c62cc3", + "988add1d46364a21be7e3cdd25bfeea6", + "3a0e2b4a4437470ca73d21b47b2e50bf", + "40d83f16100d4d52abdae1bfd57b3737", + "63a94da5642d4e638d34090f1c039ab1", + "be7c4264ae594792a8d5e325ffcd73f9", + "fdf5702bc6a0416284af79696f1bb7f8", + "1c85d25bb99440a0aab08a49200203f5", + "3b7848513468421aac1d1e8547223825", + "5bb9a2c83c5a4dc8ad1acc44ca79d7e8" + ] + }, + "id": "3m5iQ2zfqV5F", + "outputId": "a4e73b97-8181-4a1c-97da-124c74ff4195" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/512 [00:00" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "visualize(model, model.mixed4a)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Other Custom Modules" + ], + "metadata": { + "id": "T2AJzaGTZseI" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Custom NaturalImage Modules\n", + "\n", + "The requirements for creating your own variation of `NaturalImage` are extremely simple. The `forward` function should wrap the output in an `ImageTensor` instance. For JIT support, you can wrap the output in an `ImageTensor` instance inside a separate function that's wrapped with `@torch.jit.ignore`." + ], + "metadata": { + "id": "FIsFUiGPZdRm" + } + }, + { + "cell_type": "code", + "source": [ + "class CustomNaturalImage(opt.images.ImageParameterization):\n", + " def __init__(self, parameterization: opt.images.ImageParameterization) -> None:\n", + " \"\"\"\n", + " Args:\n", + "\n", + " parameterization (ImageParameterization): The image parameterization\n", + " instance you wish to use.\n", + " \"\"\"\n", + " super().__init__()\n", + " self.parameterization = parameterization\n", + "\n", + " @torch.jit.ignore\n", + " def to_image_tensor(self, x: torch.Tensor) -> torch.Tensor:\n", + " return opt.images.ImageTensor(x)\n", + "\n", + " def forward(self) -> torch.Tensor:\n", + " \"\"\"\n", + " Collect the current parameterized tensor and wrap it in ImageTensor.\n", + "\n", + " Returns\n", + " image(torch.Tensor): A PyTorch tensor.\n", + " \"\"\"\n", + " image = self.parameterization()\n", + " return self.to_image_tensor(image) # Wrap output in opt.images.ImageTensor" + ], + "metadata": { + "id": "xAKSiqg1ZccC" + }, + "execution_count": null, + "outputs": [] } - } - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "CustomModules_OptimViz.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "42c156add91d4acaadcdefa7d261363e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b6d1bc1fa28140e2839110ea31c62cc3", + "IPY_MODEL_988add1d46364a21be7e3cdd25bfeea6", + "IPY_MODEL_3a0e2b4a4437470ca73d21b47b2e50bf" + ], + "layout": "IPY_MODEL_40d83f16100d4d52abdae1bfd57b3737" + } + }, + "b6d1bc1fa28140e2839110ea31c62cc3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_63a94da5642d4e638d34090f1c039ab1", + "placeholder": "​", + "style": "IPY_MODEL_be7c4264ae594792a8d5e325ffcd73f9", + "value": "100%" + } + }, + "988add1d46364a21be7e3cdd25bfeea6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fdf5702bc6a0416284af79696f1bb7f8", + "max": 512, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1c85d25bb99440a0aab08a49200203f5", + "value": 512 + } + }, + "3a0e2b4a4437470ca73d21b47b2e50bf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3b7848513468421aac1d1e8547223825", + "placeholder": "​", + "style": "IPY_MODEL_5bb9a2c83c5a4dc8ad1acc44ca79d7e8", + "value": " 512/512 [00:12<00:00, 41.83 step/s, Objective=-32.6]" + } + }, + "40d83f16100d4d52abdae1bfd57b3737": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "63a94da5642d4e638d34090f1c039ab1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "be7c4264ae594792a8d5e325ffcd73f9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fdf5702bc6a0416284af79696f1bb7f8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1c85d25bb99440a0aab08a49200203f5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3b7848513468421aac1d1e8547223825": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5bb9a2c83c5a4dc8ad1acc44ca79d7e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From 264a8ad563993c0d73dc772ba0f3d763882485d9 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 2 Jun 2022 10:33:25 -0600 Subject: [PATCH 030/174] Support non default input sizes in FacetLoss --- captum/optim/_core/loss.py | 14 +++++++++++++- tests/optim/core/test_loss.py | 28 +++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 6542e828b7..04457aaa30 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -1037,7 +1037,19 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: else: strength_t = self.strength flat_attr = strength_t * flat_attr - return torch.sum(flat_attr * self.facet_weights) + + if ( + self.facet_weights.dim() == 4 + and layer.dim() == 4 + and self.facet_weights.shape[2:] != layer.shape[2:] + ): + facet_weights = torch.nn.functional.interpolate( + self.facet_weights, size=layer.shape[2:] + ) + else: + facet_weights = self.facet_weights + + return torch.sum(flat_attr * facet_weights) def sum_loss_list( diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 6ae0105f55..ee8e34a033 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -289,7 +289,7 @@ def test_facetloss_single_channel(self) -> None: model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() - facet_weights = torch.ones([1, 2, 1, 1]) * 1.5 + facet_weights = torch.ones([1, 2, 6, 6]) * 1.5 loss = opt_loss.FacetLoss( ultimate_target=model[1], layer_target=model[0].layer, @@ -308,7 +308,7 @@ def test_facetloss_multi_channel(self) -> None: model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([1, 1, 1]).float() - facet_weights = torch.ones([1, 2, 1, 1]) * 2.0 + facet_weights = torch.ones([1, 2, 6, 6]) * 2.0 loss = opt_loss.FacetLoss( ultimate_target=model[1], layer_target=model[0].layer, @@ -325,7 +325,7 @@ def test_facetloss_strength(self) -> None: model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() - facet_weights = torch.ones([1, 2, 1, 1]) * 1.5 + facet_weights = torch.ones([1, 2, 6, 6]) * 1.5 strength = 0.5 loss = opt_loss.FacetLoss( ultimate_target=model[1], @@ -345,7 +345,7 @@ def test_facetloss_strength_batch(self) -> None: model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() - facet_weights = torch.ones([1, 2, 1, 1]) * 1.5 + facet_weights = torch.ones([1, 2, 6, 6]) * 1.5 strength = [0.1, 5.05] loss = opt_loss.FacetLoss( ultimate_target=model[1], @@ -385,7 +385,7 @@ def test_facetloss_batch_index(self) -> None: model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) vec = torch.tensor([0, 1, 0]).float() - facet_weights = torch.ones([1, 2, 1, 1]) * 1.5 + facet_weights = torch.ones([1, 2, 5, 5]) * 1.5 loss = opt_loss.FacetLoss( ultimate_target=model[1], layer_target=model[0].layer, @@ -397,6 +397,24 @@ def test_facetloss_batch_index(self) -> None: output = get_loss_value(model, loss, model_input) self.assertAlmostEqual(output.item(), 10.38000202178955, places=5) + def test_facetloss_resize_4d(self) -> None: + layer = torch.nn.Conv2d(2, 3, 1, bias=True) + layer.weight.data.fill_(0.1) # type: ignore + layer.bias.data.fill_(1) # type: ignore + + model = torch.nn.Sequential(BasicModel_ConvNet_Optim(), layer) + + vec = torch.tensor([1, 1, 1]).float() + facet_weights = torch.ones([1, 2, 12, 12]) * 2.0 + loss = opt_loss.FacetLoss( + ultimate_target=model[1], + layer_target=model[0].layer, + vec=vec, + facet_weights=facet_weights, + ) + output = get_loss_value(model, loss, input_shape=[1, 3, 6, 6]) + self.assertAlmostEqual(output, 1.560000, places=6) + class TestCompositeLoss(BaseTest): def test_negative(self) -> None: From 00927a13ee8c3aa267a95a9f3dc1662c17e47d6f Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 2 Jun 2022 14:10:06 -0600 Subject: [PATCH 031/174] Improve loss docs * Add missing docs. * Fix errors in existing docs. --- captum/optim/_core/loss.py | 153 ++++++++++++++++++++++++++++++++----- 1 file changed, 134 insertions(+), 19 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 194422f3f6..57b63ebc1c 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -10,6 +10,14 @@ def _make_arg_str(arg: Any) -> str: + """ + Args: + + args (Any): A set of arguments to covert to a string. + + Returns: + args (str): The args in str form. + """ arg = str(arg) too_big = len(arg) > 15 or "\n" in arg return arg[:15] + "..." if too_big else arg @@ -23,7 +31,7 @@ class Loss(ABC): """ def __init__(self) -> None: - super(Loss, self).__init__() + super().__init__() @abstractproperty def target(self) -> Union[nn.Module, List[nn.Module]]: @@ -105,10 +113,35 @@ def module_op( ) -> "CompositeLoss": """ This is a general function for applying math operations to Losses + + Args: + + self (Loss): A Loss objective instance. + other (int, float, Loss, or None): The Loss objective instance or number to + use on the self Loss objective as part of a math operation. If math_op + is a unary operation, then other should be set to None. + math_op (Callable): A math operator to use on the Loss instance. + + Returns: + loss (CompositeLoss): A CompositeLoss instance with the math operations + created by the specified arguments. """ if other is None and math_op == operator.neg: def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: + """ + Pass collected activations through loss objective, and then apply a unary + math op. + + Args: + + module (ModuleOutputMapping): A dict of captured activations with + nn.Modules as keys. + + Returns: + loss (torch.Tensor): The target activations after being run + through the loss objective, and the unary math_op. + """ return math_op(self(module)) name = self.__name__ @@ -116,6 +149,19 @@ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: elif isinstance(other, (int, float)): def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: + """ + Pass collected activations through the loss objective and then apply the + math operations with numbers. + + Args: + + module (ModuleOutputMapping): A dict of captured activations with + nn.Modules as keys. + + Returns: + loss (torch.Tensor): The target activations after being run + through the loss objective, and then the math_op with a number. + """ return math_op(self(module), other) name = self.__name__ @@ -123,6 +169,19 @@ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: elif isinstance(other, Loss): # We take the mean of the output tensor to resolve shape mismatches def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: + """ + Pass collected activations through the loss objectives and then combine the + outputs with a math operation. + + Args: + + module (ModuleOutputMapping): A dict of captured activations with + nn.Modules as keys. + + Returns: + loss (torch.Tensor): The target activations after being run + through the loss objectives, and then merged with the math_op. + """ return math_op(torch.mean(self(module)), torch.mean(other(module))) name = f"Compose({', '.join([self.__name__, other.__name__])})" @@ -143,7 +202,18 @@ def __init__( target: Union[nn.Module, List[nn.Module]] = [], batch_index: Optional[Union[int, List[int]]] = None, ) -> None: - super(BaseLoss, self).__init__() + """ + Args: + + target (nn.Module or list of nn.module): A target nn.Module or list of + nn.Module. + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set to + None, defaults to all activations in the batch. Index ranges should be + in the format of: [start, end]. + Default: None + """ + super().__init__() self._target = target if batch_index is None: self._batch_index = (None, None) @@ -156,10 +226,20 @@ def __init__( @property def target(self) -> Union[nn.Module, List[nn.Module]]: + """ + Returns: + target (nn.Module or list of nn.Module): A target nn.Module or list of + nn.Module. + """ return self._target @property def batch_index(self) -> Tuple: + """ + Returns: + batch_index (tuple of int): A tuple of batch indices with a format + of: (start, end). + """ return self._batch_index @@ -170,11 +250,35 @@ def __init__( name: str = "", target: Union[nn.Module, List[nn.Module]] = [], ) -> None: - super(CompositeLoss, self).__init__(target) + """ + Args: + + loss_fn (Callable): A function that takes a dict of captured activations + with nn.Modules as keys, and then passes those activations through loss + objective(s) & math operations. + name (str, optional): The name of all composable operations in the + instance. + Default: "" + target (nn.Module or list of nn.module): A target nn.Module or list of + nn.Module. + """ + super().__init__(target) self.__name__ = name self.loss_fn = loss_fn def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: + """ + Pass collected activations through the loss function. + + Args: + + module (ModuleOutputMapping): A dict of captured activations with + nn.Modules as keys. + + Returns: + loss (torch.Tensor): The target activations after being run through the + loss function. + """ return self.loss_fn(targets_to_values) @@ -206,7 +310,7 @@ class LayerActivation(BaseLoss): instance to optimize the output of. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. index ranges should be + None, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: None """ @@ -239,7 +343,7 @@ def __init__( channel_index (int): The index of the channel to optimize for. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. index ranges should be + None, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: None """ @@ -292,7 +396,7 @@ def __init__( Default: None batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. index ranges should be + None, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: None """ @@ -333,7 +437,7 @@ class DeepDream(BaseLoss): instance to optimize the output of. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. index ranges should be + None, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: None """ @@ -360,7 +464,7 @@ class TotalVariation(BaseLoss): instance to optimize the output of. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. index ranges should be + None, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: None """ @@ -393,7 +497,7 @@ def __init__( constant (float): Constant threshold to deduct from the activations. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. index ranges should be + None, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: None """ @@ -430,7 +534,7 @@ def __init__( Default: 1e-6 batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. index ranges should be + None, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: None """ @@ -461,7 +565,7 @@ class Diversity(BaseLoss): target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. batch_index (list of int, optional): The index range of activations to - optimize. If set to None, defaults to all activations in the batch. index + optimize. If set to None, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: None """ @@ -579,7 +683,7 @@ def __init__( Default: 2.0 batch_index (list of int, optional): The index range of activations to optimize. If set to None, defaults to all activations in the batch. - index ranges should be in the format of: [start, end]. + Index ranges should be in the format of: [start, end]. Default: None """ if batch_index: @@ -730,7 +834,7 @@ class AngledNeuronDirection(BaseLoss): More information on the algorithm this objective uses can be found here: https://github.com/tensorflow/lucid/issues/116 - This Lucid equivalents of this loss function can be found here: + This Lucid equivalents of this loss objective can be found here: https://github.com/tensorflow/lucid/blob/master/notebooks/ activation-atlas/activation-atlas-simple.ipynb https://github.com/tensorflow/lucid/blob/master/notebooks/ @@ -775,6 +879,10 @@ def __init__( eps (float, optional): If cossim_pow is greater than zero, the desired epsilon value to use for cosine similarity calculations. Default: 1.0e-4 + batch_index (int, optional): The index of activations to optimize if + optimizing a batch of activations. If set to None, defaults to all + activations in the batch. + Default: None """ BaseLoss.__init__(self, target, batch_index) self.vec = vec.unsqueeze(0) if vec.dim() == 1 else vec @@ -948,22 +1056,22 @@ def sum_loss_list( ) -> CompositeLoss: """ Summarize a large number of losses without recursion errors. By default using 300+ - loss functions for a single optimization task will result in exceeding Python's + loss objectives for a single optimization task will result in exceeding Python's default maximum recursion depth limit. This function can be used to avoid the - recursion depth limit for tasks such as summarizing a large list of loss functions + recursion depth limit for tasks such as summarizing a large list of loss objectives with the built-in sum() function. This function works similar to Lucid's optvis.objectives.Objective.sum() function. Args: - loss_list (list): A list of loss function objectives. - to_scalar_fn (Callable): A function for converting loss function outputs to + loss_list (list): A list of loss objectives. + to_scalar_fn (Callable): A function for converting loss objective outputs to scalar values, in order to prevent size mismatches. Default: torch.mean Returns: - loss_fn (CompositeLoss): A composite loss function containing all the loss + loss_fn (CompositeLoss): A CompositeLoss instance containing all the loss functions from `loss_list`. """ @@ -985,11 +1093,18 @@ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor: """ - Helper function to summarize tensor outputs from loss functions. + Helper function to summarize tensor outputs from loss objectives. default_loss_summarize applies `mean` to the loss tensor and negates it so that optimizing it maximizes the activations we are interested in. + + Args: + + loss_value (torch.Tensor): A tensor containing the loss values. + + Returns: + loss_value (torch.Tensor): The loss_value's mean multiplied by -1. """ return -1 * loss_value.mean() From 16f2177fe5f5df08800e6bc96291bb5a3a053b27 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 3 Jun 2022 14:49:14 -0600 Subject: [PATCH 032/174] Add additional tests --- tests/optim/core/test_loss.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 97f4c78ed1..0ba3651175 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -220,7 +220,7 @@ def test_l2_batch_index(self) -> None: model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() output = get_loss_value(model, loss, model_input) self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) - self.assertEqual(output.item(), 987.9017944335938) + self.assertAlmostEqual(output.item(), 987.9017944335938, places=3) class TestDiversity(BaseTest): @@ -736,6 +736,23 @@ def test_sum_loss_list_compose_add(self) -> None: out = get_loss_value(model, loss_fn, [n_batch, 3, 1, 1]) self.assertEqual(out, float(n_batch + 1.0)) + def test_sum_loss_list_sum(self) -> None: + n_batch = 100 + model = torch.nn.Identity() + loss_fn_list = [opt_loss.LayerActivation(model) for i in range(n_batch)] + loss_fn = opt_loss.sum_loss_list(loss_fn_list, torch.sum) + out = get_loss_value(model, loss_fn, [n_batch, 3, 1, 1]) + self.assertEqual(out.item(), 30000.0) + + def test_sum_loss_list_identity(self) -> None: + n_batch = 100 + model = torch.nn.Identity() + loss_fn_list = [opt_loss.LayerActivation(model) for i in range(n_batch)] + loss_fn = opt_loss.sum_loss_list(loss_fn_list, torch.nn.Identity()) + out = get_loss_value(model, loss_fn, [n_batch, 3, 1, 1]) + self.assertEqual(list(out.shape), [n_batch, 3, 1, 1]) + self.assertEqual(out.sum().item(), 30000.0) + class TestModuleOP(BaseTest): def test_module_op_loss_unary_op(self) -> None: From 63843b59edc96c96fa0c844b2bd200c16a559c4c Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 5 Jun 2022 09:23:30 -0600 Subject: [PATCH 033/174] Add packaging library to setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 48bc6f4057..09fe441950 100755 --- a/setup.py +++ b/setup.py @@ -147,7 +147,7 @@ def get_package_files(root, subdirs): long_description=long_description, long_description_content_type="text/markdown", python_requires=">=3.6", - install_requires=["matplotlib", "numpy", "torch>=1.6"], + install_requires=["matplotlib", "numpy", "packaging", "torch>=1.6"], packages=find_packages(exclude=("tests", "tests.*")), extras_require={ "dev": DEV_REQUIRES, From 9305b109417ca24ee6893075e03f3da241e59252 Mon Sep 17 00:00:00 2001 From: Daniel Krakowczykgit Date: Thu, 9 Jun 2022 16:59:48 -0700 Subject: [PATCH 034/174] Add support for captum toplevel-import. (#912) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: This fixes issue https://github.com/pytorch/captum/issues/680 Strange import issue --> AttributeError: module 'captum' has no attribute 'attr' In most python packages, you can import the toplevel package, like numpy, scipy, torch, etc.. and then access the submodules simply by the dot-operator. Like you can use `import numpy` and after that you can use any submodules à la `numpy.random.uniform`. With this PR, you can just `import captum` and then for example use `captum.attr.DeepLift` or `captum.robust.Perturbation` instead of having to import both. It's just a small convenience, and I think there are more people that expect this kind of import to work but don't bother to create an issue out of this. I hope this PR is considered as helpful. Pull Request resolved: https://github.com/pytorch/captum/pull/912 Reviewed By: NarineK Differential Revision: D37053826 Pulled By: vivekmig fbshipit-source-id: 64fa2be7651ca30571d1eb85b45dd11410676c4b --- captum/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/captum/__init__.py b/captum/__init__.py index 24b3fae727..fda440d4f5 100644 --- a/captum/__init__.py +++ b/captum/__init__.py @@ -1,3 +1,11 @@ #!/usr/bin/env python3 +import captum.attr as attr # noqa +import captum.concept as concept # noqa +import captum.influence as influence # noqa +import captum.insights as insights # noqa +import captum.log as log # noqa +import captum.metrics as metrics # noqa +import captum.robust as robust # noqa + __version__ = "0.5.0" From 1489f3fd3ab4011626fa23850bf364802426c893 Mon Sep 17 00:00:00 2001 From: Zhiyuan Chen Date: Fri, 10 Jun 2022 08:24:25 -0700 Subject: [PATCH 035/174] Fix comment in saliency (#970) Summary: Pull Request resolved: https://github.com/pytorch/captum/pull/970 Reviewed By: aobo-y Differential Revision: D37010644 Pulled By: vivekmig fbshipit-source-id: 8f90f0d428e48bd156ea743e6983b825194284f4 --- captum/attr/_core/saliency.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/captum/attr/_core/saliency.py b/captum/attr/_core/saliency.py index 7e2aeed5cd..3790bd2068 100644 --- a/captum/attr/_core/saliency.py +++ b/captum/attr/_core/saliency.py @@ -43,9 +43,9 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which integrated - gradients are computed. If forward_func takes a single - tensor as input, a single input tensor should be provided. + inputs (tensor or tuple of tensors): Input for which saliency + is computed. If forward_func takes a single tensor + as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple of the input tensors should be provided. It is assumed that for all given input tensors, dimension 0 corresponds From 4aa7e48a98ec607cd6f257e11ff9eb60b5dee985 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 12 Jun 2022 14:06:46 -0700 Subject: [PATCH 036/174] Fix `set_all_random_seeds` testing function (#974) Summary: A seed value was being passed to the function, but it was ignored. This PR fixes that. Pull Request resolved: https://github.com/pytorch/captum/pull/974 Reviewed By: NarineK Differential Revision: D37093181 Pulled By: vivekmig fbshipit-source-id: 87daeb3b7b242f42c66f20836a9702a9226819ca --- tests/helpers/basic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/helpers/basic.py b/tests/helpers/basic.py index 8f5fb0ae9f..7ac9a8b4cd 100644 --- a/tests/helpers/basic.py +++ b/tests/helpers/basic.py @@ -81,11 +81,11 @@ def assert_delta(test, delta): ) -def set_all_random_seeds(seed): - random.seed(1234) - np.random.seed(1234) - torch.manual_seed(1234) - torch.cuda.manual_seed_all(1234) +def set_all_random_seeds(seed: int = 1234) -> None: + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True @@ -96,6 +96,6 @@ class BaseTest(unittest.TestCase): initializations are random, this ensures that tests run deterministically. """ - def setUp(self): + def setUp(self) -> None: set_all_random_seeds(1234) patch_methods(self) From 0ece0afeadb923fbaff798e2e3ebb7a1665ccc6d Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 13 Jun 2022 17:12:06 -0700 Subject: [PATCH 037/174] Add missing type hints in accordance with PEP-0484 (#976) Summary: Adding `-> None` to `__init__` functions is specified by [PEP-0484](https://www.python.org/dev/peps/pep-0484/) and is really easy to do with regex, so I went ahead and did it for the `captum/` & `tests/` directories. I used this to do it: https://stackoverflow.com/questions/64948233/how-to-add-none-to-the-end-of-init-functions-with-notepad If this PR seems a bit familiar, it's because I submitted a similar one 2 years ago: https://github.com/pytorch/captum/pull/535 Pull Request resolved: https://github.com/pytorch/captum/pull/976 Reviewed By: vivekmig Differential Revision: D37094723 Pulled By: NarineK fbshipit-source-id: 10b73a325f838ffb8c1301c6132792ef3730197c --- captum/_utils/av.py | 2 +- captum/_utils/progress.py | 4 +-- captum/_utils/sample_gradient.py | 28 +++++++++---------- captum/concept/_core/tcav.py | 2 +- captum/concept/_utils/classifier.py | 6 ++-- .../influence/_core/similarity_influence.py | 2 +- captum/influence/_utils/common.py | 2 +- captum/influence/_utils/nearest_neighbors.py | 2 +- captum/log/__init__.py | 2 +- .../_core/metrics/min_param_perturbation.py | 2 +- tests/attr/test_hook_removal.py | 2 +- tests/concept/test_concept.py | 2 +- tests/helpers/basic_models.py | 10 +++---- .../_core/test_similarity_influence.py | 4 +-- tests/influence/_utils/common.py | 18 ++++++------ tests/robust/test_attack_comparator.py | 2 +- tests/utils/test_av.py | 2 +- 17 files changed, 46 insertions(+), 46 deletions(-) diff --git a/captum/_utils/av.py b/captum/_utils/av.py index f3b235dd8d..ac3c32a204 100644 --- a/captum/_utils/av.py +++ b/captum/_utils/av.py @@ -47,7 +47,7 @@ def __init__( identifier: Optional[str] = None, layer: Optional[str] = None, num_id: Optional[str] = None, - ): + ) -> None: r""" Loads into memory the list of all activation file paths associated with the input `model_id`. diff --git a/captum/_utils/progress.py b/captum/_utils/progress.py index 88cb07e83f..2ece45ad99 100644 --- a/captum/_utils/progress.py +++ b/captum/_utils/progress.py @@ -12,7 +12,7 @@ class DisableErrorIOWrapper(object): - def __init__(self, wrapped: TextIO): + def __init__(self, wrapped: TextIO) -> None: """ The wrapper around a TextIO object to ignore write errors like tqdm https://github.com/tqdm/tqdm/blob/bcce20f771a16cb8e4ac5cc5b2307374a2c0e535/tqdm/utils.py#L131 @@ -48,7 +48,7 @@ def __init__( total: int = None, file: TextIO = None, mininterval: float = 0.5, - ): + ) -> None: """ Simple progress output used when tqdm is unavailable. Same as tqdm, output to stderr channel diff --git a/captum/_utils/sample_gradient.py b/captum/_utils/sample_gradient.py index 694b2c0121..d17e5b9bf1 100644 --- a/captum/_utils/sample_gradient.py +++ b/captum/_utils/sample_gradient.py @@ -1,6 +1,6 @@ from collections import defaultdict from enum import Enum -from typing import cast, Iterable, Tuple, Union +from typing import cast, DefaultDict, Iterable, List, Tuple, Union import torch from captum._utils.common import _format_tensor_into_tuples, _register_backward_hook @@ -8,7 +8,7 @@ from torch.nn import Module -def _reset_sample_grads(module: Module): +def _reset_sample_grads(module: Module) -> None: module.weight.sample_grad = 0 # type: ignore if module.bias is not None: module.bias.sample_grad = 0 # type: ignore @@ -100,19 +100,19 @@ class SampleGradientWrapper: - https://github.com/pytorch/opacus/tree/main/opacus/grad_sample """ - def __init__(self, model): + def __init__(self, model) -> None: self.model = model self.hooks_added = False - self.activation_dict = defaultdict(list) - self.gradient_dict = defaultdict(list) - self.forward_hooks = [] - self.backward_hooks = [] + self.activation_dict: DefaultDict[Module, List[Tensor]] = defaultdict(list) + self.gradient_dict: DefaultDict[Module, List[Tensor]] = defaultdict(list) + self.forward_hooks: List[torch.utils.hooks.RemovableHandle] = [] + self.backward_hooks: List[torch.utils.hooks.RemovableHandle] = [] - def add_hooks(self): + def add_hooks(self) -> None: self.hooks_added = True self.model.apply(self._register_module_hooks) - def _register_module_hooks(self, module: torch.nn.Module): + def _register_module_hooks(self, module: torch.nn.Module) -> None: if isinstance(module, tuple(SUPPORTED_MODULES.keys())): self.forward_hooks.append( module.register_forward_hook(self._forward_hook_fn) @@ -126,7 +126,7 @@ def _forward_hook_fn( module: Module, module_input: Union[Tensor, Tuple[Tensor, ...]], module_output: Union[Tensor, Tuple[Tensor, ...]], - ): + ) -> None: inp_tuple = _format_tensor_into_tuples(module_input) self.activation_dict[module].append(inp_tuple[0].clone().detach()) @@ -135,11 +135,11 @@ def _backward_hook_fn( module: Module, grad_input: Union[Tensor, Tuple[Tensor, ...]], grad_output: Union[Tensor, Tuple[Tensor, ...]], - ): + ) -> None: grad_output_tuple = _format_tensor_into_tuples(grad_output) self.gradient_dict[module].append(grad_output_tuple[0].clone().detach()) - def remove_hooks(self): + def remove_hooks(self) -> None: self.hooks_added = False for hook in self.forward_hooks: @@ -151,11 +151,11 @@ def remove_hooks(self): self.forward_hooks = [] self.backward_hooks = [] - def _reset(self): + def _reset(self) -> None: self.activation_dict = defaultdict(list) self.gradient_dict = defaultdict(list) - def compute_param_sample_gradients(self, loss_blob, loss_mode="mean"): + def compute_param_sample_gradients(self, loss_blob, loss_mode="mean") -> None: assert ( loss_mode.upper() in LossMode.__members__ ), f"Provided loss mode {loss_mode} is not valid" diff --git a/captum/concept/_core/tcav.py b/captum/concept/_core/tcav.py index 6d79ba06ae..8b6c996856 100644 --- a/captum/concept/_core/tcav.py +++ b/captum/concept/_core/tcav.py @@ -27,7 +27,7 @@ class LabelledDataset(Dataset): It is used to train a classifier in train_tcav """ - def __init__(self, datasets: List[AV.AVDataset], labels: List[int]): + def __init__(self, datasets: List[AV.AVDataset], labels: List[int]) -> None: """ Creates the LabelledDataset given a list of K Datasets, and a length K list of integer labels representing K different concepts. diff --git a/captum/concept/_utils/classifier.py b/captum/concept/_utils/classifier.py index b9b21f809d..5bdf605470 100644 --- a/captum/concept/_utils/classifier.py +++ b/captum/concept/_utils/classifier.py @@ -126,7 +126,7 @@ class DefaultClassifier(Classifier): class and handles large concept datasets accordingly. """ - def __init__(self): + def __init__(self) -> None: warnings.warn( "Using default classifier for TCAV which keeps input" " both train and test datasets in the memory. Consider defining" @@ -178,7 +178,7 @@ def train_and_eval( predict = self.lm(x_test) - predict = self.lm.classes()[torch.argmax(predict, dim=1)] + predict = self.lm.classes()[torch.argmax(predict, dim=1)] # type: ignore score = predict.long() == y_test.long().cpu() accs = score.float().mean() @@ -217,7 +217,7 @@ def classes(self) -> List[int]: classes (list): The list of classes used by the classifier to train the model in the `train_and_eval` method. """ - return self.lm.classes().detach().numpy() + return self.lm.classes().detach().numpy() # type: ignore def _train_test_split( diff --git a/captum/influence/_core/similarity_influence.py b/captum/influence/_core/similarity_influence.py index f781079a48..83cb2966fa 100644 --- a/captum/influence/_core/similarity_influence.py +++ b/captum/influence/_core/similarity_influence.py @@ -77,7 +77,7 @@ def __init__( similarity_direction: str = "max", batch_size: int = 1, **kwargs: Any, - ): + ) -> None: r""" Args: module (torch.nn.Module): An instance of pytorch model. This model should diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index 10783eaf4c..b86ddf9f93 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -305,7 +305,7 @@ def _get_k_most_influential_helper( class _DatasetFromList(Dataset): - def __init__(self, _l: List[Any]): + def __init__(self, _l: List[Any]) -> None: self._l = _l def __getitem__(self, i: int) -> Any: diff --git a/captum/influence/_utils/nearest_neighbors.py b/captum/influence/_utils/nearest_neighbors.py index 3c26d1d448..3ecd452de3 100644 --- a/captum/influence/_utils/nearest_neighbors.py +++ b/captum/influence/_utils/nearest_neighbors.py @@ -92,7 +92,7 @@ class AnnoyNearestNeighbors(NearestNeighbors): but arbitrary shape *, and flatten them before storing in the Annoy data structure. """ - def __init__(self, num_trees: int = 10): + def __init__(self, num_trees: int = 10) -> None: """ Args: num_trees (int): The number of trees to use. Increasing this number gives diff --git a/captum/log/__init__.py b/captum/log/__init__.py index 81d61383d0..8c0b7472ad 100644 --- a/captum/log/__init__.py +++ b/captum/log/__init__.py @@ -19,7 +19,7 @@ def log(*args, **kwargs): # bug with mypy: https://github.com/python/mypy/issues/1153 class TimedLog: # type: ignore - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: pass def __enter__(self): diff --git a/captum/robust/_core/metrics/min_param_perturbation.py b/captum/robust/_core/metrics/min_param_perturbation.py index 279179ab64..99308727e4 100644 --- a/captum/robust/_core/metrics/min_param_perturbation.py +++ b/captum/robust/_core/metrics/min_param_perturbation.py @@ -51,7 +51,7 @@ def __init__( preproc_fn: Optional[Callable] = None, apply_before_preproc: bool = False, correct_fn: Optional[Callable] = None, - ): + ) -> None: r""" Identifies minimal perturbation based on target variable which causes misclassification (or other incorrect prediction) of target input. diff --git a/tests/attr/test_hook_removal.py b/tests/attr/test_hook_removal.py index b23f80f933..ce0d0b3316 100644 --- a/tests/attr/test_hook_removal.py +++ b/tests/attr/test_hook_removal.py @@ -45,7 +45,7 @@ class HookRemovalMode(Enum): class ErrorModule(Module): def __init__( self, - ): + ) -> None: super().__init__() self.relu = torch.nn.ReLU() diff --git a/tests/concept/test_concept.py b/tests/concept/test_concept.py index ab7e81e42a..2efb336a5a 100644 --- a/tests/concept/test_concept.py +++ b/tests/concept/test_concept.py @@ -14,7 +14,7 @@ class CustomIterableDataset(IterableDataset): An auxiliary class for iterating through an image dataset. """ - def __init__(self, get_tensor_from_filename_func, path): + def __init__(self, get_tensor_from_filename_func, path) -> None: r""" Args: diff --git a/tests/helpers/basic_models.py b/tests/helpers/basic_models.py index 84020bae21..f2e5c02292 100644 --- a/tests/helpers/basic_models.py +++ b/tests/helpers/basic_models.py @@ -16,7 +16,7 @@ class BasicLinearReLULinear(nn.Module): - def __init__(self, in_features, out_features=5, bias=False): + def __init__(self, in_features, out_features=5, bias=False) -> None: super().__init__() self.fc1 = nn.Linear(in_features, out_features, bias=bias) self.relu1 = nn.ReLU() @@ -30,7 +30,7 @@ def forward(self, x): class MixedKwargsAndArgsModule(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() def forward(self, x, y=None): @@ -135,7 +135,7 @@ def forward(self, x1, x2): class BasicLinearModel2(nn.Module): - def __init__(self, in_features, out_features): + def __init__(self, in_features, out_features) -> None: super().__init__() self.linear = nn.Linear(in_features, out_features, bias=False) @@ -144,7 +144,7 @@ def forward(self, input): class BasicLinearModel_Multilayer(nn.Module): - def __init__(self, in_features, hidden_nodes, out_features): + def __init__(self, in_features, hidden_nodes, out_features) -> None: super().__init__() self.linear1 = nn.Linear(in_features, hidden_nodes, bias=False) self.linear2 = nn.Linear(hidden_nodes, out_features, bias=False) @@ -433,7 +433,7 @@ def forward(self, x1: Tensor, x2: Tensor, x3: Tensor, scale: int): class BasicModel_MultiLayer_TrueMultiInput(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self.m1 = BasicModel_MultiLayer() self.m234 = BasicModel_MultiLayer_MultiInput() diff --git a/tests/influence/_core/test_similarity_influence.py b/tests/influence/_core/test_similarity_influence.py index 4477e57094..ec08bf6cf1 100644 --- a/tests/influence/_core/test_similarity_influence.py +++ b/tests/influence/_core/test_similarity_influence.py @@ -13,7 +13,7 @@ class BasicLinearNet(nn.Module): - def __init__(self, num_features): + def __init__(self, num_features) -> None: super().__init__() self.fc1 = nn.Linear(num_features, 5, bias=False) self.fc1.weight.data.fill_(0.02) @@ -29,7 +29,7 @@ def forward(self, x): class RangeDataset(Dataset): - def __init__(self, low, high, num_features): + def __init__(self, low, high, num_features) -> None: self.samples = ( torch.arange(start=low, end=high, dtype=torch.float) .repeat(num_features, 1) diff --git a/tests/influence/_utils/common.py b/tests/influence/_utils/common.py index 5d7cd3d5a0..90f14353c7 100644 --- a/tests/influence/_utils/common.py +++ b/tests/influence/_utils/common.py @@ -26,7 +26,7 @@ def isSorted(x, key=lambda x: x, descending=True): class ExplicitDataset(Dataset): - def __init__(self, samples, labels): + def __init__(self, samples, labels) -> None: self.samples, self.labels = samples, labels def __len__(self): @@ -37,7 +37,7 @@ def __getitem__(self, idx): class UnpackDataset(Dataset): - def __init__(self, samples, labels): + def __init__(self, samples, labels) -> None: self.samples, self.labels = samples, labels def __len__(self): @@ -52,13 +52,13 @@ def __getitem__(self, idx): class IdentityDataset(ExplicitDataset): - def __init__(self, num_features): + def __init__(self, num_features) -> None: self.samples = torch.diag(torch.ones(num_features)) self.labels = torch.zeros(num_features).unsqueeze(1) class RangeDataset(ExplicitDataset): - def __init__(self, low, high, num_features): + def __init__(self, low, high, num_features) -> None: self.samples = ( torch.arange(start=low, end=high, dtype=torch.float) .repeat(num_features, 1) @@ -68,7 +68,7 @@ def __init__(self, low, high, num_features): class BinaryDataset(ExplicitDataset): - def __init__(self): + def __init__(self) -> None: self.samples = F.normalize( torch.stack( ( @@ -108,7 +108,7 @@ def __init__(self): class CoefficientNet(nn.Module): - def __init__(self, in_features=1): + def __init__(self, in_features=1) -> None: super().__init__() self.fc1 = nn.Linear(in_features, 1, bias=False) self.fc1.weight.data.fill_(0.01) @@ -119,7 +119,7 @@ def forward(self, x): class BasicLinearNet(nn.Module): - def __init__(self, in_features, hidden_nodes, out_features): + def __init__(self, in_features, hidden_nodes, out_features) -> None: super().__init__() self.linear1 = nn.Linear(in_features, hidden_nodes) self.linear2 = nn.Linear(hidden_nodes, out_features) @@ -130,7 +130,7 @@ def forward(self, input): class MultLinearNet(nn.Module): - def __init__(self, in_features, hidden_nodes, out_features, num_inputs): + def __init__(self, in_features, hidden_nodes, out_features, num_inputs) -> None: super().__init__() self.pre = nn.Linear(in_features * num_inputs, in_features) self.linear1 = nn.Linear(in_features, hidden_nodes) @@ -206,7 +206,7 @@ class DataInfluenceConstructor: def __init__( self, data_influence_class: type, name: Optional[str] = None, **kwargs - ): + ) -> None: self.data_influence_class = data_influence_class self.name = name if name else data_influence_class.__name__ self.kwargs = kwargs diff --git a/tests/robust/test_attack_comparator.py b/tests/robust/test_attack_comparator.py index 494fe2f649..2b356455f8 100644 --- a/tests/robust/test_attack_comparator.py +++ b/tests/robust/test_attack_comparator.py @@ -51,7 +51,7 @@ def string_batch_perturb(inp: List[List[str]]) -> List[List[str]]: class SamplePerturb: - def __init__(self): + def __init__(self) -> None: self.count = 0 def perturb(self, inp: Tensor) -> Tensor: diff --git a/tests/utils/test_av.py b/tests/utils/test_av.py index d5d4e2b92c..956bcd34de 100644 --- a/tests/utils/test_av.py +++ b/tests/utils/test_av.py @@ -13,7 +13,7 @@ class RangeDataset(Dataset): - def __init__(self, low, high, num_features): + def __init__(self, low, high, num_features) -> None: self.samples = ( torch.arange(start=low, end=high, dtype=torch.float) .repeat(num_features, 1) From 8b82a37959822d60669f5db9b700b348483a6361 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 16 Jun 2022 12:13:04 -0600 Subject: [PATCH 038/174] Add alias for ImageTensor.open() --- captum/optim/_param/image/images.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py index 3fade94f64..e4c4c05210 100644 --- a/captum/optim/_param/image/images.py +++ b/captum/optim/_param/image/images.py @@ -69,6 +69,11 @@ def open(cls, path: str, scale: float = 255.0, mode: str = "RGB") -> "ImageTenso img_np = np.array(img.convert(mode)).astype(np.float32) return cls(img_np.transpose(2, 0, 1) / scale) + @classmethod + def load(cls, path: str, scale: float = 255.0, mode: str = "RGB") -> "ImageTensor": + """Alias of ImageTensor.open()""" + return cls.open(path=path, scale=scale, mode=mode) + def __repr__(self) -> str: prefix = "ImageTensor(" indent = len(prefix) From afc4759083b29ace64c5a80481ad5306ad41eab4 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 20 Jun 2022 12:08:05 -0700 Subject: [PATCH 039/174] Update minimum PyTorch version in README (#977) Summary: It should be >= 1.6, not >= 1.2. Pull Request resolved: https://github.com/pytorch/captum/pull/977 Reviewed By: vivekmig Differential Revision: D37270407 Pulled By: NarineK fbshipit-source-id: cd51a5e5f8665143c4171be001675d624b6a60b3 --- README.md | 2 +- environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 801fa4d23a..5f415f7e0a 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Captum can also be used by application engineers who are using trained models in **Installation Requirements** - Python >= 3.6 -- PyTorch >= 1.2 +- PyTorch >= 1.6 ##### Installing the latest release diff --git a/environment.yml b/environment.yml index cd9c40927c..61de9e0096 100644 --- a/environment.yml +++ b/environment.yml @@ -3,4 +3,4 @@ channels: - pytorch dependencies: - numpy - - pytorch>=1.2 + - pytorch>=1.6 From 857f26c07eab76344543624b5ce20d2b85ec4ee1 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 20 Jun 2022 14:54:10 -0600 Subject: [PATCH 040/174] Add CompositeLoss to __all__ --- captum/optim/_core/loss.py | 1 + 1 file changed, 1 insertion(+) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 57b63ebc1c..fa0808a981 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -1113,6 +1113,7 @@ def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor: "Loss", "loss_wrapper", "BaseLoss", + "CompositeLoss", "LayerActivation", "ChannelActivation", "NeuronActivation", From 9e9a6beb0cf7f5a8f4d55225d087e2240f0c1953 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 20 Jun 2022 18:49:17 -0600 Subject: [PATCH 041/174] Add Conv2dSame to __all__ --- captum/optim/models/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/captum/optim/models/__init__.py b/captum/optim/models/__init__.py index 687aab0f85..121fa09257 100755 --- a/captum/optim/models/__init__.py +++ b/captum/optim/models/__init__.py @@ -6,6 +6,7 @@ get_model_layers, replace_layers, skip_layers, + Conv2dSame, ) from ._image.inception5h_classes import INCEPTION5H_CLASSES # noqa: F401 from ._image.inception_v1 import InceptionV1, googlenet # noqa: F401 @@ -18,6 +19,7 @@ ) __all__ = [ + "Conv2dSame", "MaxPool2dRelaxed", "RedirectedReluLayer", "SkipLayer", From 027038381e18c68cd4838039448290de472f1864 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 20 Jun 2022 20:05:05 -0600 Subject: [PATCH 042/174] Fix doc formatting for Sphinx --- captum/optim/_utils/image/atlas.py | 2 ++ captum/optim/_utils/image/common.py | 7 +++++++ captum/optim/_utils/reducer.py | 6 ++++++ captum/optim/models/_common.py | 17 +++++++++++++---- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/captum/optim/_utils/image/atlas.py b/captum/optim/_utils/image/atlas.py index 5954a3a471..dd68bccc64 100644 --- a/captum/optim/_utils/image/atlas.py +++ b/captum/optim/_utils/image/atlas.py @@ -78,6 +78,7 @@ def calc_grid_indices( ] Args: + xy_grid (torch.tensor): The xy coordinate grid activation samples, with a shape of: [n_points, 2]. grid_size (Tuple[int, int]): The grid_size of grid cells to use. The grid_size @@ -86,6 +87,7 @@ def calc_grid_indices( Default: (0.0, 1.0) y_extent (Tuple[float, float], optional): The y axis range to use. Default: (0.0, 1.0) + Returns: indices (list of list of torch.Tensors): List of lists of grid indices stored inside tensors to use. Each 1D tensor of indices has a size of: diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py index 39a6ada5ea..77da453678 100644 --- a/captum/optim/_utils/image/common.py +++ b/captum/optim/_utils/image/common.py @@ -208,6 +208,7 @@ def _dot_cossim( a specified dimension. Args: + x (torch.Tensor): The tensor that you wish to compute the cosine similarity for in relation to tensor y. y (torch.Tensor): The tensor that you wish to compute the cosine similarity @@ -216,6 +217,7 @@ def _dot_cossim( dim (int, optional): The target dimension for computing cosine similarity. eps (float, optional): If cossim_pow is greater than zero, the desired epsilon value to use for cosine similarity calculations. + Returns: tensor (torch.Tensor): Dot cosine similarity between x and y, along the specified dim. @@ -241,13 +243,16 @@ def hue_to_rgb( ) -> torch.Tensor: """ Create an RGB unit vector based on a hue of the input angle. + Args: + angle (float): The hue angle to create an RGB color for. device (torch.device, optional): The device to create the angle color tensor on. Default: torch.device("cpu") warp (bool, optional): Whether or not to make colors more distinguishable. Default: True + Returns: color_vec (torch.Tensor): A color vector. """ @@ -293,6 +298,7 @@ def nchannels_to_rgb( Default: True eps (float, optional): An optional epsilon value. Default: 1e-4 + Returns: tensor (torch.Tensor): An NCHW RGB image tensor. """ @@ -326,6 +332,7 @@ def weights_to_heatmap_2d( no excitation or inhibition. Args: + weight (torch.Tensor): A 2d tensor to create the heatmap from. colors (list of str): A list of 5 strings containing hex triplet (six digit), three-byte hexadecimal color values to use for coloring diff --git a/captum/optim/_utils/reducer.py b/captum/optim/_utils/reducer.py index 2696d003d6..585d0157e0 100644 --- a/captum/optim/_utils/reducer.py +++ b/captum/optim/_utils/reducer.py @@ -22,6 +22,7 @@ class ChannelReducer: See here for more information: https://distill.pub/2018/building-blocks/ Args: + n_components (int, optional): The number of channels to reduce the target dimension to. reduction_alg (str or callable, optional): The desired dimensionality @@ -71,11 +72,14 @@ def fit_transform( ) -> torch.Tensor: """ Perform dimensionality reduction on an input tensor. + Args: + tensor (tensor): A tensor to perform dimensionality reduction on. swap_2nd_and_last_dims (bool, optional): If true, input channels are expected to be in the second dimension unless the input tensor has a shape of CHW. Default is set to True. + Returns: *tensor*: A tensor with one of it's dimensions reduced. """ @@ -131,8 +135,10 @@ def posneg(x: torch.Tensor, dim: int = 0) -> torch.Tensor: NMF with regular NMF. Args: + x (tensor): A tensor to make positive. dim (int, optional): The dimension to concatinate the two tensor halves at. + Returns: tensor (torch.tensor): A positive tensor for one-sided dimensionality reduction. diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index e65e281217..2e4352738b 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -112,13 +112,16 @@ def _transfer_layer_vars( """ Given a layer instance, create a new layer instance of another class with the same initialization variables as the original layer. + Args: + layer1: (nn.Module): A layer instance that you want to transfer initialization variables from. layer2: (nn.Module): The layer class to create with the variables from of layer1. kwargs: (Any, optional): Any additional variables to use when creating the new layer. + Returns: layer2 instance (nn.Module): An instance of layer2 with the initialization variables that it shares with layer1, and any specified additional @@ -273,13 +276,15 @@ class SkipLayer(torch.nn.Module): See nn.Identity for more details: https://pytorch.org/docs/stable/generated/torch.nn.Identity.html - - Args: - args (Any): Any argument. Arguments will be safely ignored. - kwargs (Any) Any keyword argument. Arguments will be safely ignored. """ def __init__(self, *args, **kwargs) -> None: + """ + Args: + + args (Any): Any argument. Arguments will be safely ignored. + kwargs (Any) Any keyword argument. Arguments will be safely ignored. + """ super().__init__() def forward( @@ -287,9 +292,11 @@ def forward( ) -> Union[torch.Tensor, Tuple[torch.Tensor]]: """ Args: + x (torch.Tensor or tuple of torch.Tensor): The input tensor or tensors. args (Any): Any argument. Arguments will be safely ignored. kwargs (Any) Any keyword argument. Arguments will be safely ignored. + Returns: x (torch.Tensor or tuple of torch.Tensor): The unmodified input tensor or tensors. @@ -306,7 +313,9 @@ def skip_layers( with layers that do nothing. This is useful for removing the nonlinear ReLU layers when creating expanded weights. + Args: + model (nn.Module): A PyTorch model instance. layers (nn.Module or list of nn.Module): The layer class type to replace in the model. From 73eedd11468ce8a50d2a5e34c9ce0d9b0da93563 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 23 Jun 2022 08:59:12 -0600 Subject: [PATCH 043/174] Fix docs for Sphinx --- captum/optim/_core/optimization.py | 21 ++++++++++++++++++- captum/optim/models/_common.py | 9 ++++---- .../models/_image/inception_v1_places365.py | 4 ++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index cd11db9e34..ae5a78e652 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -35,6 +35,18 @@ class InputOptimization(Objective, Parameterized): For more details, see the following: https://github.com/tensorflow/lucid https://distill.pub/2017/feature-visualization/ + + Instance variables that be used in the optimize function and StopCriteria: + + :ivar model: initial value (nn.Module): The given model instance given when + initializing InputOptimization. + :ivar input_param: initial value (ImageParameterization): The given image + parameterization instance given when initializing InputOptimization. + :ivar loss_fn: initial value (Loss): The given composable loss instance given + when initializing InputOptimization. + :ivar transform: initial value (nn.Module): The given transform instance given + when initializing InputOptimization. If it was set to None during + initialization, then an instance of torch.nn.Identity will be returned. """ def __init__( @@ -95,7 +107,9 @@ def loss(self) -> torch.Tensor: return loss_value def cleanup(self) -> None: - r"""Garbage collection, mainly removing hooks.""" + r"""Garbage collection, mainly removing hooks. + This should only be run after optimize is finished running. + """ self.hooks.remove_hooks() # Targets are managed by ModuleOutputHooks; we mainly just want a convenient setter @@ -109,6 +123,11 @@ def targets(self, value: Iterable[nn.Module]) -> None: self.hooks = ModuleOutputsHook(value) def parameters(self) -> Iterable[nn.Parameter]: + """ + Returns: + parameters (iterable of nn.Parameter): An iterable of parameters in the + image parameterization. + """ return self.input_param.parameters() def optimize( diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 2e4352738b..c9af0dc73f 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -147,8 +147,7 @@ def _transfer_layer_vars( class Conv2dSame(nn.Conv2d): """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions. - TODO: Replace with torch.nn.Conv2d when support for padding='same' - is in stable version + torch.nn.Conv2d with padding='same' can be used when the stride is equal to 1. """ def __init__( @@ -190,7 +189,7 @@ def __init__( in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias ) - def calc_same_pad(self, i: int, k: int, s: int, d: int) -> int: + def _calc_same_pad(self, i: int, k: int, s: int, d: int) -> int: """ Calculate the required padding for a dimension. @@ -217,8 +216,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: """ ih, iw = x.size()[-2:] kh, kw = self.weight.size()[-2:] - pad_h = self.calc_same_pad(i=ih, k=kh, s=self.stride[0], d=self.dilation[0]) - pad_w = self.calc_same_pad(i=iw, k=kw, s=self.stride[1], d=self.dilation[1]) + pad_h = self._calc_same_pad(i=ih, k=kh, s=self.stride[0], d=self.dilation[0]) + pad_w = self._calc_same_pad(i=iw, k=kw, s=self.stride[1], d=self.dilation[1]) if pad_h > 0 or pad_w > 0: x = F.pad( diff --git a/captum/optim/models/_image/inception_v1_places365.py b/captum/optim/models/_image/inception_v1_places365.py index 5ebca2a9b5..85afc7b32d 100644 --- a/captum/optim/models/_image/inception_v1_places365.py +++ b/captum/optim/models/_image/inception_v1_places365.py @@ -24,6 +24,7 @@ def googlenet_places365( dataset. See here for more information: https://arxiv.org/abs/1610.02055 Args: + pretrained (bool, optional): If True, returns a model pre-trained on the MIT Places365 Standard dataset. Default: False @@ -47,6 +48,9 @@ def googlenet_places365( transform_input (bool, optional): If True, preprocesses the input according to the method with which it was trained on Places365. Default: True + + Returns: + **model** (InceptionV1Places365): An InceptionV1 Places365 model instance. """ if pretrained: From c45f6944995c33612995793ad612e53045941736 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 23 Jun 2022 11:37:01 -0600 Subject: [PATCH 044/174] Minor fixes --- captum/optim/_core/optimization.py | 4 +-- tests/optim/core/test_optimization.py | 46 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index ae5a78e652..68310087f6 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -42,8 +42,8 @@ class InputOptimization(Objective, Parameterized): initializing InputOptimization. :ivar input_param: initial value (ImageParameterization): The given image parameterization instance given when initializing InputOptimization. - :ivar loss_fn: initial value (Loss): The given composable loss instance given - when initializing InputOptimization. + :ivar loss_function: initial value (Loss): The given composable loss instance + given when initializing InputOptimization. :ivar transform: initial value (nn.Module): The given transform instance given when initializing InputOptimization. If it was set to None during initialization, then an instance of torch.nn.Identity will be returned. diff --git a/tests/optim/core/test_optimization.py b/tests/optim/core/test_optimization.py index 7f77cf4b4d..c0def7ffef 100644 --- a/tests/optim/core/test_optimization.py +++ b/tests/optim/core/test_optimization.py @@ -9,6 +9,52 @@ class TestInputOptimization(BaseTest): + def test_input_optimization_init(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping InputOptimization init test due to insufficient Torch" + + " version." + ) + model = BasicModel_ConvNet_Optim() + loss_fn = opt.loss.ChannelActivation(model.layer, 1) + transform = torch.nn.Identity() + image_param = opt.images.NaturalImage() + obj = opt.InputOptimization( + model, loss_function=loss_fn, input_param=image_param, transform=transform + ) + + self.assertEqual(model, obj.model) + self.assertEqual(image_param, obj.input_param) + self.assertEqual(transform, obj.transform) + self.assertEqual(loss_fn, obj.loss_function) + self.assertEqual(list(image_param.parameters()), list(obj.parameters())) + + def test_input_optimization_custom_optimize(self) -> torch.Tensor: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping InputOptimization custom optimze test due to insufficient" + + " Torch version." + ) + model = BasicModel_ConvNet_Optim() + loss_fn = opt.loss.ChannelActivation(model.layer, 0) + obj = opt.InputOptimization(model, loss_function=loss_fn) + + stop_criteria = opt.optimization.n_steps(512) + optimizer = torch.optim.Adam(obj.parameters(), lr=0.02) + + history, step = [], 0 + try: + while stop_criteria(step, obj, history, optimizer): + optimizer.zero_grad() + loss_value = -1.0 * obj.loss().mean() + history.append(loss_value.clone().detach()) + loss_value.backward() + optimizer.step() + step += 1 + finally: + obj.cleanup() + self.assertIsInstance(torch.stack(history), torch.Tensor) + def test_input_optimization(self) -> None: if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( From 650927e5543e8285ab9607a6f66d21d7b4b8ed53 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 23 Jun 2022 11:37:35 -0600 Subject: [PATCH 045/174] Add missing input_param attribute to InputOptimization info --- tutorials/optimviz/CustomModules_OptimViz.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/tutorials/optimviz/CustomModules_OptimViz.ipynb b/tutorials/optimviz/CustomModules_OptimViz.ipynb index ae556a1b0b..0bfe58ce15 100644 --- a/tutorials/optimviz/CustomModules_OptimViz.ipynb +++ b/tutorials/optimviz/CustomModules_OptimViz.ipynb @@ -309,6 +309,7 @@ "* The `.parameters()` function returns the list of input parameters requiring grad.\n", "* The `.loss()` function returns the loss function values.\n", "* The `.cleanup()` function removes the hooks that were used to collect activations.\n", + "* The image parameterization being used can be accessed via `.input_param` attribute.\n", "* The model being used can be accessed via `.model` attribute.\n", "* The transforms being used can be accessed via `.transforms` attribute." ], From 4cf8cfc8033951a358d3759d8ab16437143c09d4 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 23 Jun 2022 12:07:00 -0600 Subject: [PATCH 046/174] Fix test errors --- tests/optim/core/test_optimization.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/optim/core/test_optimization.py b/tests/optim/core/test_optimization.py index c0def7ffef..e7b174e480 100644 --- a/tests/optim/core/test_optimization.py +++ b/tests/optim/core/test_optimization.py @@ -29,7 +29,7 @@ def test_input_optimization_init(self) -> None: self.assertEqual(loss_fn, obj.loss_function) self.assertEqual(list(image_param.parameters()), list(obj.parameters())) - def test_input_optimization_custom_optimize(self) -> torch.Tensor: + def test_input_optimization_custom_optimize(self) -> None: if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( "Skipping InputOptimization custom optimze test due to insufficient" @@ -39,7 +39,7 @@ def test_input_optimization_custom_optimize(self) -> torch.Tensor: loss_fn = opt.loss.ChannelActivation(model.layer, 0) obj = opt.InputOptimization(model, loss_function=loss_fn) - stop_criteria = opt.optimization.n_steps(512) + stop_criteria = opt.optimization.n_steps(512, show_progress=False) optimizer = torch.optim.Adam(obj.parameters(), lr=0.02) history, step = [], 0 @@ -53,7 +53,8 @@ def test_input_optimization_custom_optimize(self) -> torch.Tensor: step += 1 finally: obj.cleanup() - self.assertIsInstance(torch.stack(history), torch.Tensor) + history = torch.stack(history) + self.assertIsInstance(history, torch.Tensor) def test_input_optimization(self) -> None: if version.parse(torch.__version__) <= version.parse("1.6.0"): From 90f9592c32a8d18197ee03264f019c5d98c80af7 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 23 Jun 2022 12:22:40 -0600 Subject: [PATCH 047/174] Fix mypy error --- tests/optim/core/test_optimization.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/optim/core/test_optimization.py b/tests/optim/core/test_optimization.py index e7b174e480..1cd3301a98 100644 --- a/tests/optim/core/test_optimization.py +++ b/tests/optim/core/test_optimization.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import unittest +from typing import List import captum.optim as opt import torch @@ -42,7 +43,8 @@ def test_input_optimization_custom_optimize(self) -> None: stop_criteria = opt.optimization.n_steps(512, show_progress=False) optimizer = torch.optim.Adam(obj.parameters(), lr=0.02) - history, step = [], 0 + history: List[torch.Tensor] = [] + step = 0 try: while stop_criteria(step, obj, history, optimizer): optimizer.zero_grad() From 7d77c7220620ad2c416ac30e6d8b24640397c5a8 Mon Sep 17 00:00:00 2001 From: Vivek Miglani Date: Thu, 23 Jun 2022 18:26:59 -0700 Subject: [PATCH 048/174] Add ODS Logging to Captum (#971) Summary: Pull Request resolved: https://github.com/pytorch/captum/pull/971 Reviewed By: NarineK Differential Revision: D37009629 fbshipit-source-id: 161a957ed56abfb734c9004fc8420e66ccde9d20 --- captum/log/__init__.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/captum/log/__init__.py b/captum/log/__init__.py index 8c0b7472ad..93a5e471fb 100644 --- a/captum/log/__init__.py +++ b/captum/log/__init__.py @@ -2,6 +2,7 @@ try: from captum.log.fb.internal_log import ( + disable_detailed_logging, log, log_usage, patch_methods, @@ -9,7 +10,13 @@ TimedLog, ) - __all__ = ["log", "log_usage", "TimedLog", "set_environment"] + __all__ = [ + "log", + "log_usage", + "TimedLog", + "set_environment", + "disable_detailed_logging", + ] except ImportError: from functools import wraps @@ -41,5 +48,8 @@ def wrapper(*args, **kwargs): def set_environment(env): pass + def disable_detailed_logging(): + pass + def patch_methods(tester, patch_log=True): pass From 5e3a80fa76a5d91f015776bcb60f9615494ef946 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 24 Jun 2022 13:30:27 -0600 Subject: [PATCH 049/174] Add docs for loss_fn in sum_loss_list --- captum/optim/_core/loss.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index fa0808a981..f4cb3a6d89 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -1076,6 +1076,20 @@ def sum_loss_list( """ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: + """ + Pass collected activations through the list of loss objectives based on + specified targets, and then apply a reduction op to reduce them to scalar + before adding them together. + + Args: + + module (ModuleOutputMapping): A dict of captured activations with + nn.Modules as keys. + + Returns: + loss (torch.Tensor): The target activations after being run through the + loss objectives, and then added together. + """ return sum([to_scalar_fn(loss(module)) for loss in loss_list]) name = "Sum(" + ", ".join([loss.__name__ for loss in loss_list]) + ")" From 613baa99a92bb5d3824715ea44bb52a9ec83db0c Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 25 Jun 2022 09:22:42 -0600 Subject: [PATCH 050/174] Add docs for loss testing helper --- tests/optim/core/test_loss.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 0ba3651175..9fc8f67be9 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -19,6 +19,21 @@ def get_loss_value( loss: opt_loss.Loss, model_input: Union[List[int], torch.Tensor] = [1, 3, 1, 1], ) -> torch.Tensor: + """ + Collect target activations and pass them through a composable loss instance. + + Args: + + model (nn.Module): A PyTorch model instance. + loss (Loss): A composable loss instance that uses targets from the provided + model instance. + model_input (list of int or torch.Tensor): A list of integers to use for the + shape of the model input, or a tensor to use as the model input. + Default: [1, 3, 1, 1] + + Returns: + loss (torch.Tensor): The target activations run through the loss objectives. + """ if isinstance(model_input, (list, tuple)): model_input = torch.ones(*model_input) else: From 6f41b207e4bd0f27657b3ecffd3526ba66d96a55 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 25 Jun 2022 17:22:38 -0600 Subject: [PATCH 051/174] Remove loss_wrapper requirement for loss objectives --- captum/optim/_core/loss.py | 1 + tests/optim/core/test_loss.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 1365537c19..2857ff7e7a 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -24,6 +24,7 @@ class Loss(ABC): def __init__(self) -> None: super(Loss, self).__init__() + self.__name__ = self.__class__.__name__ @abstractproperty def target(self) -> Union[nn.Module, List[nn.Module]]: diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 9fc8f67be9..cbf99912d0 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -532,6 +532,7 @@ def test_loss_init(self) -> None: with _OverrideAbstractFunctions(opt_loss.Loss): loss = opt_loss.Loss() # type: ignore self.assertIsNone(loss.target) + self.assertEqual(loss.__name__, "Loss") self.assertEqual(opt_loss.Loss.__name__, "Loss") @@ -547,6 +548,8 @@ def test_base_loss_init(self) -> None: self.assertEqual(loss.batch_index, (None, None)) self.assertEqual(loss._target, model) self.assertEqual(loss.target, model) + self.assertEqual(loss.__name__, "BaseLoss") + self.assertEqual(opt_loss.BaseLoss.__name__, "BaseLoss") def test_base_loss_batch_index(self) -> None: model = torch.nn.Identity() From 5f849aa4d1dd9ba9dd6cc367be0a70b8e759e8f4 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 26 Jun 2022 11:57:56 -0600 Subject: [PATCH 052/174] Fix Sphinx loss doc duplication bug --- captum/optim/_core/loss.py | 102 ++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 35 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index f4cb3a6d89..4657b23f8c 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -303,17 +303,25 @@ class LayerActivation(BaseLoss): Maximize activations at the target layer. This is the most basic loss available and it simply returns the activations in their original form. + """ - Args: + def __init__( + self, + target: nn.Module, + batch_index: Optional[Union[int, List[int]]] = None, + ) -> None: + """ + Args: - target (nn.Module): A target layer, transform, or image parameterization - instance to optimize the output of. - batch_index (int or list of int, optional): The index or index range of - activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. Index ranges should be - in the format of: [start, end]. - Default: None - """ + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set + to None, defaults to all activations in the batch. Index ranges should + be in the format of: [start, end]. + Default: None + """ + BaseLoss.__init__(self, target, batch_index) def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: activations = targets_to_values[self.target] @@ -430,17 +438,25 @@ class DeepDream(BaseLoss): This loss returns the squared layer activations. When combined with a negative mean loss summarization, this loss will create hallucinogenic visuals commonly referred to as 'Deep Dream'. + """ - Args: + def __init__( + self, + target: nn.Module, + batch_index: Optional[Union[int, List[int]]] = None, + ) -> None: + """ + Args: - target (nn.Module): A target layer, transform, or image parameterization - instance to optimize the output of. - batch_index (int or list of int, optional): The index or index range of - activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. Index ranges should be - in the format of: [start, end]. - Default: None - """ + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set + to None, defaults to all activations in the batch. Index ranges should + be in the format of: [start, end]. + Default: None + """ + BaseLoss.__init__(self, target, batch_index) def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: activations = targets_to_values[self.target] @@ -457,17 +473,25 @@ class TotalVariation(BaseLoss): This loss attempts to smooth / denoise the target by performing total variance denoising. The target is most often the image that’s being optimized. This loss is often used to remove unwanted visual artifacts. + """ - Args: + def __init__( + self, + target: nn.Module, + batch_index: Optional[Union[int, List[int]]] = None, + ) -> None: + """ + Args: - target (nn.Module): A target layer, transform, or image parameterization - instance to optimize the output of. - batch_index (int or list of int, optional): The index or index range of - activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. Index ranges should be - in the format of: [start, end]. - Default: None - """ + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + batch_index (int or list of int, optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set + to None, defaults to all activations in the batch. Index ranges should + be in the format of: [start, end]. + Default: None + """ + BaseLoss.__init__(self, target, batch_index) def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: activations = targets_to_values[self.target] @@ -559,16 +583,24 @@ class Diversity(BaseLoss): This loss helps break up polysemantic layers, channels, and neurons by encouraging diversity across the different batches. This loss is to be used along with a main loss. + """ - Args: + def __init__( + self, + target: nn.Module, + batch_index: Optional[List[int]] = None, + ) -> None: + """ + Args: - target (nn.Module): A target layer, transform, or image parameterization - instance to optimize the output of. - batch_index (list of int, optional): The index range of activations to - optimize. If set to None, defaults to all activations in the batch. Index - ranges should be in the format of: [start, end]. - Default: None - """ + target (nn.Module): A target layer, transform, or image parameterization + instance to optimize the output of. + batch_index (list of int, optional): The index range of activations to + optimize. If set to None, defaults to all activations in the batch. + Index ranges should be in the format of: [start, end]. + Default: None + """ + BaseLoss.__init__(self, target, batch_index) def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: activations = targets_to_values[self.target] From ca3b5f970242de43c5d7569ae599883150c8d69d Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 27 Jun 2022 13:23:40 -0600 Subject: [PATCH 053/174] Update _common.py --- captum/optim/models/_common.py | 36 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index c9af0dc73f..d0a1d81208 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -87,10 +87,11 @@ def replace_layers( layer1: (Type[nn.Module]): The layer class that you want to transfer initialization variables from. layer2: (Type[nn.Module]): The layer class to create with the variables - from layer1. - transfer_vars (bool, optional): Wether or not to try and copy - initialization variables from layer1 instances to the replacement - layer2 instances. + from ``layer1``. + transfer_vars (bool, optional): Whether or not to try and copy + initialization variables from ``layer1`` instances to the replacement + ``layer2`` instances. + Default: ``False`` kwargs: (Any, optional): Any additional variables to use when creating the new layer. """ @@ -172,18 +173,19 @@ def __init__( kernel_size (int or tuple of int): The desired kernel size to use. stride (int or tuple of int, optional): The desired stride for the cross-correlation. - Default: 1 + Default: ``1`` padding (int or tuple of int, optional): This value is always set to 0. - Default: 0 + Default: ``0`` dilation (int or tuple of int, optional): The desired spacing between the kernel points. - Default: 1 + Default: ``1`` groups (int, optional): Number of blocked connections from input channels to output channels. Both in_channels and out_channels must be divisable by groups. - Default: 1 + Default: ``1`` bias (bool, optional): Whether or not to apply a learnable bias to the output. + Default: ``True`` """ super().__init__( in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias @@ -249,7 +251,7 @@ def collect_activations( given model. model_input (torch.Tensor or tuple of torch.Tensor, optional): Optionally provide an input tensor to use when collecting the target activations. - Default: torch.zeros(1, 3, 224, 224) + Default: ``torch.zeros(1, 3, 224, 224)`` Returns: activ_dict (ModuleOutputMapping): A dictionary of collected activations where @@ -269,9 +271,9 @@ class SkipLayer(torch.nn.Module): during the forward pass. Use cases include removing nonlinear activation layers like ReLU for circuits research. - This layer works almost exactly the same way that nn.Indentiy does, except it also - ignores any additional arguments passed to the forward function. Any layer replaced - by SkipLayer must have the same input and output shapes. + This layer works almost exactly the same way that ``nn.Indentiy`` does, except it + also ignores any additional arguments passed to the forward function. Any layer + replaced by ``SkipLayer`` must have the same input and output shapes. See nn.Identity for more details: https://pytorch.org/docs/stable/generated/torch.nn.Identity.html @@ -365,15 +367,15 @@ def __init__( Args: kernel_size (int or tuple of int): The size of the window to perform max & - average pooling with. + average pooling with. stride (int or tuple of int, optional): The stride window size to use. - Default: None + Default: ``None`` padding (int or tuple of int): The amount of zero padding to add to both - sides in the nn.MaxPool2d & nn.AvgPool2d modules. - Default: 0 + sides in the ``nn.MaxPool2d`` & ``nn.AvgPool2d`` modules. + Default: ``0`` ceil_mode (bool, optional): Whether to use ceil or floor for creating the output shape. - Default: False + Default: ``False`` """ super().__init__() self.maxpool = torch.nn.MaxPool2d( From e87c975531532f8e5dd1bf55d10612b3814369c8 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 27 Jun 2022 15:19:09 -0600 Subject: [PATCH 054/174] Improve ImageTensor, Optimization, & submodule docs for Sphinx --- captum/optim/_core/optimization.py | 47 +++++++---- captum/optim/_param/image/images.py | 81 ++++++++++++------- captum/optim/_utils/image/dataset.py | 28 ++++--- .../models/_image/inception_v1_places365.py | 79 +++++++++--------- 4 files changed, 141 insertions(+), 94 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 68310087f6..4072b0f987 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -36,17 +36,29 @@ class InputOptimization(Objective, Parameterized): https://github.com/tensorflow/lucid https://distill.pub/2017/feature-visualization/ - Instance variables that be used in the optimize function and StopCriteria: + Example:: + + >>> model = opt.models.googlenet(pretrained=True) + >>> loss_fn = opt.loss.LayerActivation(model.mixed4c) + >>> image = opt.images.NaturalImage(size=(224, 224)) + >>> transform = opt.transforms.TransformationRobustness() + >>> + >>> obj = opt.InputOptimization(model, loss_fn, image, transform) + >>> history = obj.optimize(opt.optimization.n_steps(512)) + >>> image().show(figsize=(10, 10)) # Display results + + Instance variables that be used in the optimize function and StopCriteria + functions: :ivar model: initial value (nn.Module): The given model instance given when - initializing InputOptimization. + initializing ``InputOptimization``. :ivar input_param: initial value (ImageParameterization): The given image - parameterization instance given when initializing InputOptimization. + parameterization instance given when initializing ``InputOptimization``. :ivar loss_function: initial value (Loss): The given composable loss instance - given when initializing InputOptimization. + given when initializing ``InputOptimization``. :ivar transform: initial value (nn.Module): The given transform instance given - when initializing InputOptimization. If it was set to None during - initialization, then an instance of torch.nn.Identity will be returned. + when initializing ``InputOptimization``. If it was set to ``None`` during + initialization, then an instance of ``torch.nn.Identity`` will be returned. """ def __init__( @@ -142,17 +154,17 @@ def optimize( Args: stop_criteria (StopCriteria, optional): A function that is called - every iteration and returns a bool that determines whether - to stop the optimization. - See captum.optim.typing.StopCriteria for details. - optimizer (Optimizer, optional): An torch.optim.Optimizer used to - optimize the input based on the loss function. + every iteration and returns a bool that determines whether to stop the + optimization. + Default: ``n_steps(512)`` + optimizer (Optimizer, optional): An ``torch.optim.Optimizer`` used to + optimize the input based on the loss function. loss_summarize_fn (Callable, optional): The function to use for summarizing tensor outputs from loss functions. - Default: default_loss_summarize + Default: ``default_loss_summarize`` lr: (float, optional): If no optimizer is given, then lr is used as the learning rate for the Adam optimizer. - Default: 0.025 + Default: ``0.025`` Returns: history (torch.Tensor): A stack of loss values per iteration. The size @@ -182,13 +194,18 @@ def optimize( def n_steps(n: int, show_progress: bool = True) -> StopCriteria: """StopCriteria generator that uses number of steps as a stop criteria. + Example:: + + >>> stop_criteria = opt.optimization.n_steps(512, True) + Args: + n (int): Number of steps to run optimization. show_progress (bool, optional): Whether or not to show progress bar. - Default: True + Default: ``True`` Returns: - *StopCriteria* callable + *StopCriteria* (callable): A stop criteria function. """ if show_progress: diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py index e4c4c05210..64400f24ff 100644 --- a/captum/optim/_param/image/images.py +++ b/captum/optim/_param/image/images.py @@ -21,6 +21,27 @@ class ImageTensor(torch.Tensor): + r""" + A subclass of torch.Tensor that provides functions for easy loading, saving, and + displaying image tensors. + + Example using file path or URL:: + + >>> image_tensor = opt.images.ImageTensor.load() + >>> image_tensor.export(filename="image_tensor.jpg") # Save image(s) + >>> image_tensor.show(figsize=(8, 8)) # Displays image(s) via Matplotlib + + Example using ``torch.Tensor``:: + + >>> image_tensor = torch.randn(1, 3, 224, 224) + >>> image_tensor = opt.images.ImageTensor(image_tensor) + + Example using ``np.ndarray``:: + + >>> image_tensor = np.random.rand(1, 3, 224, 224) + >>> image_tensor = opt.images.ImageTensor(image_tensor) + """ + @staticmethod def __new__( cls: Type["ImageTensor"], @@ -32,10 +53,10 @@ def __new__( Args: x (list or np.ndarray or torch.Tensor): A list, NumPy array, or PyTorch - tensor to create an `ImageTensor` from. + tensor to create an ``ImageTensor`` from. Returns: - x (ImageTensor): An `ImageTensor` instance. + x (ImageTensor): An ``ImageTensor`` instance. """ if isinstance(x, torch.Tensor) and x.is_cuda: x.show = MethodType(cls.show, x) @@ -45,17 +66,18 @@ def __new__( return super().__new__(cls, x, *args, **kwargs) @classmethod - def open(cls, path: str, scale: float = 255.0, mode: str = "RGB") -> "ImageTensor": + def load(cls, path: str, scale: float = 255.0, mode: str = "RGB") -> "ImageTensor": """ - Load an image file from a URL or local filepath directly into an `ImageTensor`. + Load an image file from a URL or local filepath directly into an + ``ImageTensor``. Args: path (str): A URL or filepath to an image. scale (float, optional): The image scale to use. - Default: 255.0 + Default: ``255.0`` mode (str, optional): The image loading mode / colorspace to use. - Default: "RGB" + Default: ``"RGB"`` Returns: x (ImageTensor): An `ImageTensor` instance. @@ -70,9 +92,9 @@ def open(cls, path: str, scale: float = 255.0, mode: str = "RGB") -> "ImageTenso return cls(img_np.transpose(2, 0, 1) / scale) @classmethod - def load(cls, path: str, scale: float = 255.0, mode: str = "RGB") -> "ImageTensor": - """Alias of ImageTensor.open()""" - return cls.open(path=path, scale=scale, mode=mode) + def open(cls, path: str, scale: float = 255.0, mode: str = "RGB") -> "ImageTensor": + r"""Alias for :func:`load`.""" + return cls.load(path=path, scale=scale, mode=mode) def __repr__(self) -> str: prefix = "ImageTensor(" @@ -109,25 +131,25 @@ def show( pad_value: float = 0.0, ) -> None: """ - Display an `ImageTensor`. + Display an ``ImageTensor`` instance. Args: figsize (Tuple[int, int], optional): height & width to use - for displaying the `ImageTensor` figure. - scale (float, optional): Value to multiply the `ImageTensor` by so that + for displaying the ``ImageTensor`` figure. + scale (float, optional): Value to multiply the ``ImageTensor`` by so that it's value range is [0-255] for display. - Default: 255.0 + Default: ``255.0`` images_per_row (int, optional): The number of images per row to use for the - grid image. Default is set to None for no grid image creation. - Default: None + grid image. Default is set to ``None`` for no grid image creation. + Default: ``None`` padding (int, optional): The amount of padding between images in the grid - images. This parameter only has an effect if `images_per_row` is not - None. - Default: 2 + images. This parameter only has an effect if ``images_per_row`` is not + ``None``. + Default: ``2`` pad_value (float, optional): The value to use for the padding. This - parameter only has an effect if `images_per_row` is not None. - Default: 0.0 + parameter only has an effect if ``images_per_row`` is not None. + Default: ``0.0`` """ show( self, @@ -152,23 +174,24 @@ def export( Args: - filename (str): The filename to use when saving the `ImageTensor` as an + filename (str): The filename to use when saving the ``ImageTensor`` as an image file. - scale (float, optional): Value to multiply the `ImageTensor` by so that + scale (float, optional): Value to multiply the ``ImageTensor`` by so that it's value range is [0-255] for saving. - Default: 255.0 + Default: ``255.0`` mode (str, optional): A PIL / Pillow supported colorspace. Default is set to None for automatic RGB / RGBA detection and usage. - Default: None + Default: ``None`` images_per_row (int, optional): The number of images per row to use for the grid image. Default is set to None for no grid image creation. - Default: None + Default: ``None`` padding (int, optional): The amount of padding between images in the grid - images. This parameter only has an effect if `images_per_row` is not - None. + images. This parameter only has an effect if ``images_per_row`` is not + ``None``. + Default: ``2`` pad_value (float, optional): The value to use for the padding. This - parameter only has an effect if `images_per_row` is not None. - Default: 0.0 + parameter only has an effect if ``images_per_row`` is not ``None``. + Default: ``0.0`` """ save_tensor_as_image( self, diff --git a/captum/optim/_utils/image/dataset.py b/captum/optim/_utils/image/dataset.py index 66bf18b53a..66eee6dc38 100644 --- a/captum/optim/_utils/image/dataset.py +++ b/captum/optim/_utils/image/dataset.py @@ -19,11 +19,11 @@ def image_cov(x: torch.Tensor) -> torch.Tensor: Args: - x (torch.Tensor): One or more NCHW image tensors stacked across the batch + x (torch.Tensor): One or more NCHW image tensors stacked across the batch dimension. Returns: - *tensor* (torch.Tensor): The average color channel covariance matrix for the + *tensor* (torch.Tensor): The average color channel covariance matrix for the for the input tensor, with a shape of: [n_channels, n_channels]. """ @@ -47,10 +47,10 @@ def dataset_cov_matrix( loader (torch.utils.data.DataLoader): The reference to a PyTorch dataloader instance. show_progress (bool, optional): Whether or not to display a tqdm progress bar. - Default: False + Default: ``False`` device (torch.device, optional): The PyTorch device to use for for calculating the cov matrix. - Default: torch.device("cpu") + Default: ``torch.device("cpu")`` Returns: *tensor*: A covariance matrix for the specified dataset. @@ -91,10 +91,12 @@ def cov_matrix_to_klt( Args: - cov_mtx (tensor): A 3 by 3 covariance matrix generated from a dataset. - normalize (bool): Whether or not to normalize the resulting KLT matrix. - Default: False - epsilon (float): + cov_mtx (tensor): A 3 by 3 covariance matrix generated from a dataset. + normalize (bool): Whether or not to normalize the resulting KLT matrix. + Default: ``False`` + epsilon (float, optional): A small epsilon value to use for numerical + stability. + Default: ``1e-10`` Returns: *tensor*: A KLT matrix for the specified covariance matrix. @@ -121,15 +123,15 @@ def dataset_klt_matrix( Args: - loader (torch.utils.data.DataLoader): The reference to a PyTorch + loader (torch.utils.data.DataLoader): The reference to a PyTorch dataloader instance. - normalize (bool): Whether or not to normalize the resulting KLT matrix. - Default: False + normalize (bool): Whether or not to normalize the resulting KLT matrix. + Default: ``False`` show_progress (bool, optional): Whether or not to display a tqdm progress bar. - Default: False + Default: ``False`` device (torch.device, optional): The PyTorch device to use for for calculating the cov matrix. - Default: torch.device("cpu") + Default: ``torch.device("cpu")`` Returns: *tensor*: A KLT matrix for the specified dataset. diff --git a/captum/optim/models/_image/inception_v1_places365.py b/captum/optim/models/_image/inception_v1_places365.py index 85afc7b32d..acd5f8fe7f 100644 --- a/captum/optim/models/_image/inception_v1_places365.py +++ b/captum/optim/models/_image/inception_v1_places365.py @@ -23,31 +23,36 @@ def googlenet_places365( The pretrained GoogleNet model was trained using the MIT Places365 Standard dataset. See here for more information: https://arxiv.org/abs/1610.02055 + Example:: + + >>> model = opt.models.googlenet_places365(pretrained=True) + >>> output = model(torch.zeros(1, 3, 224, 224)) + Args: - pretrained (bool, optional): If True, returns a model pre-trained on the MIT - Places365 Standard dataset. - Default: False - progress (bool, optional): If True, displays a progress bar of the download to - stderr - Default: True - model_path (str, optional): Optional path for InceptionV1 model file. - Default: None - replace_relus_with_redirectedrelu (bool, optional): If True, return pretrained - model with Redirected ReLU in place of ReLU layers. - Default: *True* when pretrained is True otherwise *False* - use_linear_modules_only (bool, optional): If True, return pretrained + pretrained (bool, optional): If ``True``, returns a model pre-trained on the + MIT Places365 Standard dataset. + Default: ``False`` + progress (bool, optional): If ``True``, displays a progress bar of the + download to stderr. + Default: ``True`` + model_path (str, optional): Optional path for the InceptionV1 model file. + Default: ``None`` + replace_relus_with_redirectedrelu (bool, optional): If ``True``, return + pretrained model with Redirected ReLU in place of ReLU layers. + Default: *``True``* when pretrained is True otherwise *``False``* + use_linear_modules_only (bool, optional): If ``True``, return pretrained model with all nonlinear layers replaced with linear equivalents. - Default: False - aux_logits (bool, optional): If True, adds two auxiliary branches that can + Default: ``False`` + aux_logits (bool, optional): If ``True``, adds two auxiliary branches that can improve training. - Default: True + Default: ``True`` out_features (int, optional): Number of output features in the model used for - training. Default: 365 when pretrained is True. - Default: 365 + training. + Default: ``365`` transform_input (bool, optional): If True, preprocesses the input according to the method with which it was trained on Places365. - Default: True + Default: ``True`` Returns: **model** (InceptionV1Places365): An InceptionV1 Places365 model instance. @@ -99,19 +104,19 @@ def __init__( out_features (int, optional): Number of output features in the model used for training. - Default: 365 - aux_logits (bool, optional): If True, adds two auxiliary branches that can - improve training. - Default: True - transform_input (bool, optional): If True, preprocesses the input according - to the method with which it was trained on Places365. - Default: True - replace_relus_with_redirectedrelu (bool, optional): If True, return + Default: ``365`` + aux_logits (bool, optional): If ``True``, adds two auxiliary branches that + can improve training. + Default: ``True`` + transform_input (bool, optional): If ``True``, preprocesses the input + according to the method with which it was trained on Places365. + Default: ``True`` + replace_relus_with_redirectedrelu (bool, optional): If ``True``, return pretrained model with Redirected ReLU in place of ReLU layers. - Default: False - use_linear_modules_only (bool, optional): If True, return pretrained model - with all nonlinear layers replaced with linear equivalents. - Default: False + Default: ``False`` + use_linear_modules_only (bool, optional): If ``True``, return pretrained + model with all nonlinear layers replaced with linear equivalents. + Default: ``False`` """ super().__init__() self.aux_logits = aux_logits @@ -295,10 +300,10 @@ def __init__( pool_proj (int, optional): activ (type of nn.Module, optional): The nn.Module class type to use for activation layers. - Default: nn.ReLU + Default: ``nn.ReLU`` p_layer (type of nn.Module, optional): The nn.Module class type to use for pooling layers. - Default: nn.MaxPool2d + Default: ``nn.MaxPool2d`` """ super().__init__() self.conv_1x1 = nn.Conv2d( @@ -392,13 +397,13 @@ def __init__( in_channels (int, optional): The number of input channels to use for the auxiliary branch. - Default: 508 + Default: ``508`` out_features (int, optional): The number of output features to use for the auxiliary branch. - Default: 1008 - activ (type of nn.Module, optional): The nn.Module class type to use for - activation layers. - Default: nn.ReLU + Default: ``1008`` + activ (type of nn.Module, optional): The ``nn.Module`` class type to use + for activation layers. + Default: ``nn.ReLU`` """ super().__init__() self.avg_pool = nn.AdaptiveAvgPool2d((4, 4)) From 2b665f2a74c64097dc3e4d8ad6acfc74eeb5e7c0 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 27 Jun 2022 17:51:53 -0600 Subject: [PATCH 055/174] Improve CLIP loss docs for Sphinx --- captum/optim/_core/loss.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 04457aaa30..252f569921 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -865,14 +865,14 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance. channel_index (int, optional): Optionally only target a specific channel. - If set to None, all channels with be used. - Default: None + If set to ``None``, all channels with be used. + Default: ``None`` constant (float, optional): Constant value to deduct from the activations. - Default: 0.5 + Default: ``0.5`` batch_index (int, optional): The index of activations to optimize if - optimizing a batch of activations. If set to None, defaults to all + optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. - Default: None + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) self.constant = constant @@ -920,16 +920,16 @@ def __init__( channel / feature dimension of the target layer instance. activation_fn (Callable, optional): An optional activation function to apply to the activations before computing the matrix product. If set - to None, then no activation function will be used. - Default: torch.nn.functional.relu + to ``None``, then no activation function will be used. + Default: ``torch.nn.functional.relu`` move_channel_dim_to_final_dim (bool, optional): Whether or not to move the channel dimension to the last dimension before computing the matrix product. - Default: True + Default: ``True`` batch_index (int, optional): The index of activations to optimize if - optimizing a batch of activations. If set to None, defaults to all + optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. - Default: None + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) assert vec.dim() == 1 @@ -979,21 +979,22 @@ def __init__( visualizing targets from. This is normally the penultimate layer of the model. layer_target (nn.Module): A layer that we have facet_weights for. This - target layer should be below the ultimate_target layer in the model. + target layer should be below the ``ultimate_target`` layer in the + model. facet_weights (torch.Tensor): Weighting that steers the objective towards a particular theme or concept. These weight values should - come from linear probes trained on layer_target. + come from linear probes trained on ``layer_target``. strength (float, list of float, optional): A single float or list of floats to use for batch dimension weighting. If using a single value, then it will be applied to all batch dimensions equally. Otherwise a list of - floats with a shape of: [start, end] should be used for torch.linspace - to calculate the step values in between. Default is set to None for no - weighting. - Default: None + floats with a shape of: [start, end] should be used for + ``torch.linspace`` to calculate the step values in between. Default is + set to ``None`` for no weighting. + Default: ``None`` batch_index (int, optional): The index of the activations to optimize if - optimizing a batch of activations. If set to None, defaults to all + optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. - Default: None + Default: ``None`` """ BaseLoss.__init__(self, [ultimate_target, layer_target], batch_index) self.ultimate_target = ultimate_target From 5837745fae82c98a4267b7124cea535825821179 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 27 Jun 2022 17:56:19 -0600 Subject: [PATCH 056/174] Improve loss docs for Sphinx --- captum/optim/_core/loss.py | 240 ++++++++++++++++++++++++------------- 1 file changed, 160 insertions(+), 80 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 4657b23f8c..8a4a1a65b9 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -197,6 +197,10 @@ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: class BaseLoss(Loss): + """ + The base class used for all Loss objectives. + """ + def __init__( self, target: Union[nn.Module, List[nn.Module]] = [], @@ -209,9 +213,9 @@ def __init__( nn.Module. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. Index ranges should be - in the format of: [start, end]. - Default: None + ``None``, defaults to all activations in the batch. Index ranges should + be in the format of: [start, end]. + Default: ``None`` """ super().__init__() self._target = target @@ -244,6 +248,82 @@ def batch_index(self) -> Tuple: class CompositeLoss(BaseLoss): + """ + When math operations are performed using one or more loss objectives, this class + is used to store and run those operations. Below we show examples of common + CompositeLoss use cases. + + + Using CompositeLoss with a unary op or with a binary op involving a Loss instance + and a float or integer: + + .. code-block:: python + + def compose_single_loss(loss: opt.loss.Loss) -> opt.loss.CompositeLoss: + def loss_fn( + module: Dict[nn.Module, Optional[torch.Tensor]] + ) -> torch.Tensor: + return loss(module) + + # Name of new composable loss instance + name = loss.__name__ + # All targets being used in the composable loss instance + target = loss.target + return opt.loss.CompositeLoss(loss_fn, name=name, target=target) + + Using CompositeLoss with a binary op using two Loss instances: + + .. code-block:: python + + def compose_binary_loss( + loss1: opt.loss.Loss, loss2: opt.loss.Loss + ) -> opt.loss.CompositeLoss: + def loss_fn( + module: Dict[nn.Module, Optional[torch.Tensor]] + ) -> torch.Tensor: + # Operation using 2 loss instances + return loss1(module) + loss2(module) + + # Name of new composable loss instance + name = "Compose(" + ", ".join([loss1.__name__, loss2.__name__]) + ")" + + # All targets being used in the composable loss instance + target1 = loss1.target if type(loss1.target) is list else [loss1.target] + target2 = loss2.target if type(loss2.target) is list else [loss2.target] + target = target1 + target2 + + # Remove duplicate targets + target = list(dict.fromkeys(target)) + return opt.loss.CompositeLoss(loss_fn, name=name, target=target) + + Using CompositeLoss with a list of Loss instances: + + .. code-block:: python + + def compose_multiple_loss(loss: List[opt.loss.Loss]) -> opt.loss.CompositeLoss: + def loss_fn( + module: Dict[nn.Module, Optional[torch.Tensor]] + ) -> torch.Tensor: + loss_tensors = [loss_obj(module) for loss_obj in loss] + # We can use any operation that combines the list of tensors into a + # single tensor + return sum(loss_tensors) + + # Name of new composable loss instance + name = "Compose(" + ", ".join([obj.__name__ for obj in loss]) + ")" + + # All targets being used in the composable loss instance + # targets will either be List[nn.Module] or nn.Module + targets = [loss_obj.target for loss_obj in loss] + # Flatten list of targets + target = [ + o for l in [t if type(t) is list else [t] for t in targets] for o in l + ] + # Remove duplicate targets + target = list(dict.fromkeys(target)) + return opt.loss.CompositeLoss(loss_fn, name=name, target=target) + """ + def __init__( self, loss_fn: Callable, @@ -258,7 +338,7 @@ def __init__( objective(s) & math operations. name (str, optional): The name of all composable operations in the instance. - Default: "" + Default: ``""`` target (nn.Module or list of nn.module): A target nn.Module or list of nn.Module. """ @@ -317,9 +397,9 @@ def __init__( instance to optimize the output of. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set - to None, defaults to all activations in the batch. Index ranges should - be in the format of: [start, end]. - Default: None + to ``None``, defaults to all activations in the batch. Index ranges + should be in the format of: [start, end]. + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) @@ -351,9 +431,9 @@ def __init__( channel_index (int): The index of the channel to optimize for. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. Index ranges should be - in the format of: [start, end]. - Default: None + ``None``, defaults to all activations in the batch. Index ranges should + be in the format of: [start, end]. + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) self.channel_index = channel_index @@ -397,16 +477,16 @@ def __init__( x (int, optional): The x coordinate of the neuron to optimize for. If unspecified, defaults to center, or one unit left of center for even lengths. - Default: None + Default: ``None`` y (int, optional): The y coordinate of the neuron to optimize for. If unspecified, defaults to center, or one unit up of center for even heights. - Default: None + Default: ``None`` batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. Index ranges should be - in the format of: [start, end]. - Default: None + ``None``, defaults to all activations in the batch. Index ranges should + be in the format of: [start, end]. + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) self.channel_index = channel_index @@ -452,9 +532,9 @@ def __init__( instance to optimize the output of. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set - to None, defaults to all activations in the batch. Index ranges should - be in the format of: [start, end]. - Default: None + to ``None``, defaults to all activations in the batch. Index ranges + should be in the format of: [start, end]. + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) @@ -487,9 +567,9 @@ def __init__( instance to optimize the output of. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set - to None, defaults to all activations in the batch. Index ranges should - be in the format of: [start, end]. - Default: None + to ``None``, defaults to all activations in the batch. Index ranges + should be in the format of: [start, end]. + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) @@ -521,9 +601,9 @@ def __init__( constant (float): Constant threshold to deduct from the activations. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. Index ranges should be - in the format of: [start, end]. - Default: None + ``None``, defaults to all activations in the batch. Index ranges should + be in the format of: [start, end]. + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) self.constant = constant @@ -553,14 +633,14 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. constant (float): Constant threshold to deduct from the activations. - Default: 0.0 + Default: ``0.0`` eps (float): Small value to add to L2 prior to sqrt. - Default: 1e-6 + Default: ``1e-6`` batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to - None, defaults to all activations in the batch. Index ranges should be - in the format of: [start, end]. - Default: None + ``None``, defaults to all activations in the batch. Index ranges should + be in the format of: [start, end]. + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) self.constant = constant @@ -596,9 +676,9 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. batch_index (list of int, optional): The index range of activations to - optimize. If set to None, defaults to all activations in the batch. + optimize. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. - Default: None + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) @@ -643,13 +723,13 @@ def __init__( target1 (nn.Module): The first layer, transform, or image parameterization instance to optimize the output for. channel_index1 (int, optional): Index of channel in first target to - optimize. Default is set to None for all channels. - Default: None + optimize. Default is set to ``None`` for all channels. + Default: ``None`` target2 (nn.Module): The second layer, transform, or image parameterization instance to optimize the output for. channel_index2 (int, optional): Index of channel in second target to - optimize. Default is set to None for all channels. - Default: None + optimize. Default is set to ``None`` for all channels. + Default: ``None`` """ self.target_one = target1 self.channel_index_one = channel_index1 @@ -712,11 +792,11 @@ def __init__( instance to optimize the output of. decay_ratio (float): How much to decay penalty as images move apart in the batch. - Default: 2.0 + Default: ``2.0`` batch_index (list of int, optional): The index range of activations to - optimize. If set to None, defaults to all activations in the batch. + optimize. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. - Default: None + Default: ``None`` """ if batch_index: assert len(batch_index) == 2 @@ -766,11 +846,11 @@ def __init__( instance to optimize the output of. vec (torch.Tensor): Vector representing direction to align to. cossim_pow (float, optional): The desired cosine similarity power to use. - Default: 0.0 + Default: ``0.0`` batch_index (int, optional): The index of activations to optimize if - optimizing a batch of activations. If set to None, defaults to all - activations in the batch. - Default: None + optimizing a batch of activations. If set to ``None``, defaults to + all activations in the batch. + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) self.vec = vec.reshape((1, -1, 1, 1)) @@ -810,22 +890,22 @@ def __init__( instance to optimize the output of. vec (torch.Tensor): Vector representing direction to align to. x (int, optional): The x coordinate of the neuron to optimize for. If - set to None, defaults to center, or one unit left of center for even - lengths. - Default: None + set to ``None``, defaults to center, or one unit left of center for + even lengths. + Default: ``None`` y (int, optional): The y coordinate of the neuron to optimize for. If - set to None, defaults to center, or one unit up of center for even - heights. - Default: None + set to ``None``, defaults to center, or one unit up of center for + even heights. + Default: ``None`` channel_index (int): The index of the channel to optimize for. If set to - None, then all channels will be used. - Default: None + ``None``, then all channels will be used. + Default: ``None`` cossim_pow (float, optional): The desired cosine similarity power to use. - Default: 0.0 + Default: ``0.0`` batch_index (int, optional): The index of activations to optimize if - optimizing a batch of activations. If set to None, defaults to all + optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. - Default: None + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) self.vec = vec.reshape((1, -1, 1, 1)) @@ -897,24 +977,24 @@ def __init__( instance to optimize the output of. vec (torch.Tensor): A neuron direction vector to use. vec_whitened (torch.Tensor, optional): A whitened neuron direction vector. - If set to None, then no whitened vec will be used. - Default: None + If set to ``None``, then no whitened vec will be used. + Default: ``None`` cossim_pow (float, optional): The desired cosine similarity power to use. x (int, optional): The x coordinate of the neuron to optimize for. If - set to None, defaults to center, or one unit left of center for even - lengths. - Default: None + set to ``None``, defaults to center, or one unit left of center for + even lengths. + Default: ``None`` y (int, optional): The y coordinate of the neuron to optimize for. If - set to None, defaults to center, or one unit up of center for even - heights. - Default: None + set to ``None``, defaults to center, or one unit up of center for + even heights. + Default: ``None`` eps (float, optional): If cossim_pow is greater than zero, the desired epsilon value to use for cosine similarity calculations. - Default: 1.0e-4 + Default: ``1.0e-4`` batch_index (int, optional): The index of activations to optimize if - optimizing a batch of activations. If set to None, defaults to all + optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. - Default: None + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) self.vec = vec.unsqueeze(0) if vec.dim() == 1 else vec @@ -974,11 +1054,11 @@ def __init__( instance to optimize the output of. vec (torch.Tensor): Vector representing direction to align to. cossim_pow (float, optional): The desired cosine similarity power to use. - Default: 0.0 + Default: ``0.0`` batch_index (int, optional): The index of activations to optimize if - optimizing a batch of activations. If set to None, defaults to all + optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. - Default: None + Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) assert vec.dim() == 4 @@ -1030,21 +1110,21 @@ def __init__( instance to optimize the output of. weights (torch.Tensor): Weights to apply to targets. neuron (bool): Whether target is a neuron. - Default: False + Default: ``False`` x (int, optional): The x coordinate of the neuron to optimize for. If - set to None, defaults to center, or one unit left of center for even - lengths. - Default: None + set to ``None``, defaults to center, or one unit left of center for + even lengths. + Default: ``None`` y (int, optional): The y coordinate of the neuron to optimize for. If - set to None, defaults to center, or one unit up of center for even - heights. - Default: None + set to ``None``, defaults to center, or one unit up of center for + even heights. + Default: ``None`` wx (int, optional): Length of neurons to apply the weights to, along the - x-axis. Set to None for the full length. - Default: None + x-axis. Set to ``None`` for the full length. + Default: ``None`` wy (int, optional): Length of neurons to apply the weights to, along the - y-axis. Set to None for the full length. - Default: None + y-axis. Set to ``None`` for the full length. + Default: ``None`` """ BaseLoss.__init__(self, target) self.x = x @@ -1100,11 +1180,11 @@ def sum_loss_list( loss_list (list): A list of loss objectives. to_scalar_fn (Callable): A function for converting loss objective outputs to scalar values, in order to prevent size mismatches. - Default: torch.mean + Default: ``torch.mean`` Returns: loss_fn (CompositeLoss): A CompositeLoss instance containing all the loss - functions from `loss_list`. + functions from ``loss_list``. """ def loss_fn(module: ModuleOutputMapping) -> torch.Tensor: From d3a2ccadb7e7dd843f4e703d0ee1ae313fc8b756 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 27 Jun 2022 19:49:48 -0600 Subject: [PATCH 057/174] Improve vector function docs for Sphinx --- captum/optim/_utils/image/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py index 1f2cced14f..54cdae4b0d 100644 --- a/captum/optim/_utils/image/common.py +++ b/captum/optim/_utils/image/common.py @@ -394,11 +394,11 @@ def _create_new_vector( activation_fn (Callable, optional): An optional activation function to apply to the activations before computing the matrix product. If set to None, then no activation function will be used. - Default: torch.nn.functional.relu + Default: ``torch.nn.functional.relu`` move_channel_dim_to_final_dim (bool, optional): Whether or not to move the channel dimension to the last dimension before computing the matrix product. - Default: True + Default: ``True`` Returns x (torch.Tensor): A vector created from the input activations and the From 2a592f0512b49ffb8f67e1ceb9d7be14cabc5e6a Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 27 Jun 2022 19:52:10 -0600 Subject: [PATCH 058/174] Adjust spacing in doc variables --- captum/optim/_utils/image/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/captum/optim/_utils/image/dataset.py b/captum/optim/_utils/image/dataset.py index 66eee6dc38..8b2c5669b5 100644 --- a/captum/optim/_utils/image/dataset.py +++ b/captum/optim/_utils/image/dataset.py @@ -53,7 +53,7 @@ def dataset_cov_matrix( Default: ``torch.device("cpu")`` Returns: - *tensor*: A covariance matrix for the specified dataset. + *tensor*: A covariance matrix for the specified dataset. """ if show_progress: @@ -99,7 +99,7 @@ def cov_matrix_to_klt( Default: ``1e-10`` Returns: - *tensor*: A KLT matrix for the specified covariance matrix. + *tensor*: A KLT matrix for the specified covariance matrix. """ U, S, V = torch.svd(cov_mtx) @@ -134,7 +134,7 @@ def dataset_klt_matrix( Default: ``torch.device("cpu")`` Returns: - *tensor*: A KLT matrix for the specified dataset. + *tensor*: A KLT matrix for the specified dataset. """ cov_mtx = dataset_cov_matrix(loader, show_progress=show_progress, device=device) From e80b42eae878e0ecbbd7921871e3cc8eae825eb2 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 28 Jun 2022 19:34:57 -0600 Subject: [PATCH 059/174] Fix spacing in docs --- captum/optim/_utils/image/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/captum/optim/_utils/image/common.py b/captum/optim/_utils/image/common.py index 54cdae4b0d..9e7553b251 100644 --- a/captum/optim/_utils/image/common.py +++ b/captum/optim/_utils/image/common.py @@ -389,7 +389,7 @@ def _create_new_vector( computing the matrix product of the activations. See torch.matmul for See torch.matmul for more details on compatible shapes: https://pytorch.org/docs/stable/generated/torch.matmul.html - By default, vec is expected to share the same size as the channel or + By default, ``vec`` is expected to share the same size as the channel or feature dimension of the activations. activation_fn (Callable, optional): An optional activation function to apply to the activations before computing the matrix product. If set @@ -401,7 +401,7 @@ def _create_new_vector( Default: ``True`` Returns - x (torch.Tensor): A vector created from the input activations and the + x (torch.Tensor): A vector created from the input activations and the stored vector. """ assert x.device == vec.device From 8ceecafdff3e2bf1feefdef08639f397b851fd22 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 28 Jun 2022 19:40:05 -0600 Subject: [PATCH 060/174] Improve dataset docs --- captum/optim/_utils/image/dataset.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/captum/optim/_utils/image/dataset.py b/captum/optim/_utils/image/dataset.py index 8b2c5669b5..6a36dd2644 100644 --- a/captum/optim/_utils/image/dataset.py +++ b/captum/optim/_utils/image/dataset.py @@ -44,7 +44,7 @@ def dataset_cov_matrix( Args: - loader (torch.utils.data.DataLoader): The reference to a PyTorch + loader (torch.utils.data.DataLoader): The reference to a PyTorch dataloader instance. show_progress (bool, optional): Whether or not to display a tqdm progress bar. Default: ``False`` @@ -53,7 +53,7 @@ def dataset_cov_matrix( Default: ``torch.device("cpu")`` Returns: - *tensor*: A covariance matrix for the specified dataset. + *tensor* (torch.Tensor): A covariance matrix for the specified dataset. """ if show_progress: @@ -99,7 +99,8 @@ def cov_matrix_to_klt( Default: ``1e-10`` Returns: - *tensor*: A KLT matrix for the specified covariance matrix. + *tensor* (torch.Tensor): A KLT matrix for the specified covariance + matrix. """ U, S, V = torch.svd(cov_mtx) @@ -134,7 +135,7 @@ def dataset_klt_matrix( Default: ``torch.device("cpu")`` Returns: - *tensor*: A KLT matrix for the specified dataset. + *tensor* (torch.Tensor): A KLT matrix for the specified dataset. """ cov_mtx = dataset_cov_matrix(loader, show_progress=show_progress, device=device) From 0491cca122bf64d0275464c1362aeab8c4bea756 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 29 Jun 2022 08:41:52 -0600 Subject: [PATCH 061/174] Improve Sphinx docs --- captum/optim/models/_common.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index d0a1d81208..4b460058f8 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -82,7 +82,14 @@ def replace_layers( Replace all target layers with new layers inside the specified model, possibly with the same initialization variables. + Example:: + + >>> model = opt.models.googlenet(pretrained=True) + >>> # Replace MaxPool2d layers with their AvgPool2d equivalents + >>> opt.models.replace_layers(model, nn.MaxPool2d, nn.AvgPool2d, True) + Args: + model: (nn.Module): A PyTorch model instance. layer1: (Type[nn.Module]): The layer class that you want to transfer initialization variables from. From 86f24bac11f4110aa17408701487d792e9afdca1 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 29 Jun 2022 16:29:24 -0600 Subject: [PATCH 062/174] Improve ImageTensor docs --- captum/optim/_param/image/images.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py index 64400f24ff..7853761573 100644 --- a/captum/optim/_param/image/images.py +++ b/captum/optim/_param/image/images.py @@ -131,7 +131,8 @@ def show( pad_value: float = 0.0, ) -> None: """ - Display an ``ImageTensor`` instance. + Display image(s) in the ``ImageTensor`` instance using + :func:`captum.optim.show`. Args: @@ -170,7 +171,8 @@ def export( pad_value: float = 0.0, ) -> None: """ - Save an `ImageTensor` as an image file. + Save image(s) in the `ImageTensor` instance as an image file, using + :func:`captum.optim.save_tensor_as_image`. Args: From 975550e992524eb1ccea749e6ff44d9ad4a7a2c1 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 1 Jul 2022 13:14:42 -0600 Subject: [PATCH 063/174] Add 'Feature Visualization' keyword --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 09fe441950..cd930850f8 100755 --- a/setup.py +++ b/setup.py @@ -133,6 +133,7 @@ def get_package_files(root, subdirs): "Model Understanding", "Feature Importance", "Neuron Importance", + "Feature Visualization", "PyTorch", ], classifiers=[ From 7530ae5c24a4e67ffb88448916419a02efbba849 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 1 Jul 2022 18:50:35 -0600 Subject: [PATCH 064/174] Improve ImageTensor & Dataset docs (#552) * Update images.py * Update dataset.py * Update images.py --- captum/optim/_param/image/images.py | 5 +++-- captum/optim/_utils/image/dataset.py | 10 +++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py index 7853761573..568b8edecd 100644 --- a/captum/optim/_param/image/images.py +++ b/captum/optim/_param/image/images.py @@ -29,7 +29,7 @@ class ImageTensor(torch.Tensor): >>> image_tensor = opt.images.ImageTensor.load() >>> image_tensor.export(filename="image_tensor.jpg") # Save image(s) - >>> image_tensor.show(figsize=(8, 8)) # Displays image(s) via Matplotlib + >>> image_tensor.show() # Displays image(s) via Matplotlib Example using ``torch.Tensor``:: @@ -124,7 +124,7 @@ def __torch_function__( def show( self, - figsize: Optional[Tuple[int, int]] = None, + figsize: Optional[Tuple[int, int]] = (8, 8), scale: float = 255.0, images_per_row: Optional[int] = None, padding: int = 2, @@ -138,6 +138,7 @@ def show( figsize (Tuple[int, int], optional): height & width to use for displaying the ``ImageTensor`` figure. + Default: ``(8, 8)`` scale (float, optional): Value to multiply the ``ImageTensor`` by so that it's value range is [0-255] for display. Default: ``255.0`` diff --git a/captum/optim/_utils/image/dataset.py b/captum/optim/_utils/image/dataset.py index 6a36dd2644..9d9108f44d 100644 --- a/captum/optim/_utils/image/dataset.py +++ b/captum/optim/_utils/image/dataset.py @@ -23,7 +23,7 @@ def image_cov(x: torch.Tensor) -> torch.Tensor: dimension. Returns: - *tensor* (torch.Tensor): The average color channel covariance matrix for the + tensor (torch.Tensor): The average color channel covariance matrix for the for the input tensor, with a shape of: [n_channels, n_channels]. """ @@ -53,7 +53,7 @@ def dataset_cov_matrix( Default: ``torch.device("cpu")`` Returns: - *tensor* (torch.Tensor): A covariance matrix for the specified dataset. + tensor (torch.Tensor): A covariance matrix for the specified dataset. """ if show_progress: @@ -91,7 +91,7 @@ def cov_matrix_to_klt( Args: - cov_mtx (tensor): A 3 by 3 covariance matrix generated from a dataset. + cov_mtx (torch.Tensor): A 3 by 3 covariance matrix generated from a dataset. normalize (bool): Whether or not to normalize the resulting KLT matrix. Default: ``False`` epsilon (float, optional): A small epsilon value to use for numerical @@ -99,7 +99,7 @@ def cov_matrix_to_klt( Default: ``1e-10`` Returns: - *tensor* (torch.Tensor): A KLT matrix for the specified covariance + tensor (torch.Tensor): A KLT matrix for the specified covariance matrix. """ @@ -135,7 +135,7 @@ def dataset_klt_matrix( Default: ``torch.device("cpu")`` Returns: - *tensor* (torch.Tensor): A KLT matrix for the specified dataset. + tensor (torch.Tensor): A KLT matrix for the specified dataset. """ cov_mtx = dataset_cov_matrix(loader, show_progress=show_progress, device=device) From 4a62c0b542a75ebcd3f3a0f70e306a2ee57383a3 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 4 Jul 2022 18:25:34 -0600 Subject: [PATCH 065/174] Improve docs --- captum/optim/models/_image/inception_v1_places365.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/captum/optim/models/_image/inception_v1_places365.py b/captum/optim/models/_image/inception_v1_places365.py index acd5f8fe7f..c5df0b85b0 100644 --- a/captum/optim/models/_image/inception_v1_places365.py +++ b/captum/optim/models/_image/inception_v1_places365.py @@ -55,7 +55,7 @@ def googlenet_places365( Default: ``True`` Returns: - **model** (InceptionV1Places365): An InceptionV1 Places365 model instance. + model (InceptionV1Places365): An InceptionV1 Places365 model instance. """ if pretrained: From e63cee8f073d9d125a74fe45d0731807f7211183 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 4 Jul 2022 18:40:34 -0600 Subject: [PATCH 066/174] Improve docs --- captum/optim/_param/image/images.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py index 568b8edecd..5bb8555a17 100644 --- a/captum/optim/_param/image/images.py +++ b/captum/optim/_param/image/images.py @@ -124,7 +124,7 @@ def __torch_function__( def show( self, - figsize: Optional[Tuple[int, int]] = (8, 8), + figsize: Optional[Tuple[int, int]] = None, scale: float = 255.0, images_per_row: Optional[int] = None, padding: int = 2, @@ -138,7 +138,7 @@ def show( figsize (Tuple[int, int], optional): height & width to use for displaying the ``ImageTensor`` figure. - Default: ``(8, 8)`` + Default: ``None`` scale (float, optional): Value to multiply the ``ImageTensor`` by so that it's value range is [0-255] for display. Default: ``255.0`` From 06db64f71cb6473ed9fca91fe041026b1dfc76f8 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 5 Jul 2022 09:07:34 -0600 Subject: [PATCH 067/174] Improve dataset docs --- captum/optim/_utils/image/dataset.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/captum/optim/_utils/image/dataset.py b/captum/optim/_utils/image/dataset.py index 9d9108f44d..5319e4b9a6 100644 --- a/captum/optim/_utils/image/dataset.py +++ b/captum/optim/_utils/image/dataset.py @@ -42,6 +42,15 @@ def dataset_cov_matrix( """ Calculate the covariance matrix for an image dataset. + Example:: + + >>> # Load image dataset + >>> dataset = torchvision.datasets.ImageFolder("") + >>> dataset_loader = torch.utils.data.DataLoader(dataset) + >>> # Calculate dataset COV matrix + >>> cov_mtx = opt.dataset.dataset_cov(dataset_loader, True) + >>> print(cov_mtx) + Args: loader (torch.utils.data.DataLoader): The reference to a PyTorch @@ -117,10 +126,19 @@ def dataset_klt_matrix( device: torch.device = torch.device("cpu"), ) -> torch.Tensor: """ - Calculate the color correlation matrix, also known as - a Karhunen-Loève transform (KLT) matrix, for a dataset. - The color correlation matrix can then used in color decorrelation - transforms for models trained on the dataset. + Calculate the color correlation matrix, also known as a Karhunen-Loève transform + (KLT) matrix, for a dataset. The color correlation matrix can then used in color + decorrelation & recorrelation transforms like + :class:`captum.optim.transforms.ToRGB` for models trained on the dataset. + + Example:: + + >>> # Load image dataset + >>> dataset = torchvision.datasets.ImageFolder("") + >>> dataset_loader = torch.utils.data.DataLoader(dataset) + >>> # Calculate dataset KLT matrix + >>> klt_mtx = opt.dataset.dataset_klt_matrix(dataset_loader, True, True) + >>> print(klt_mtx) Args: From b84980a8a7c4722f29b9693234bc255150da224a Mon Sep 17 00:00:00 2001 From: Steffen Maeland Date: Tue, 5 Jul 2022 16:13:10 -0700 Subject: [PATCH 068/174] Add time series visualization function (#980) Summary: Add a convenience function to plot time series data with attributions overlaid (`captum.attr.visualization.visualize_timeseries_attr`). This addresses https://github.com/pytorch/captum/issues/958 . Comes with three styles, shown here for some dummy data: 1) Plot each channel in a separate panel, with separate heatmaps overlaid ![overlaid_individual](https://user-images.githubusercontent.com/30171842/174852816-f3c7d67f-d03f-4d04-91b4-6766052a640d.png) 2) Plot all channels in a single panel, with average heatmap overlaid ![overlaid_combined](https://user-images.githubusercontent.com/30171842/174852821-1ab089b2-9e30-4233-9726-dd3e3d9f03f5.png) 3) Plot each channel in a separate panel and color the graphs by attribution values at each time step ![colored_graph](https://user-images.githubusercontent.com/30171842/174852820-f0be8148-d432-43f3-a301-e783b98dece0.png) The function accepts matplotlib keyword arguments for additional styling. Pull Request resolved: https://github.com/pytorch/captum/pull/980 Reviewed By: vivekmig Differential Revision: D37495470 Pulled By: i-jones fbshipit-source-id: d218dc035d7158af39480a4df63a0bb9500f495c --- captum/attr/_utils/visualization.py | 330 +++++++++++++++++++++++++++- 1 file changed, 324 insertions(+), 6 deletions(-) diff --git a/captum/attr/_utils/visualization.py b/captum/attr/_utils/visualization.py index 2db9026872..0cfada9b7b 100644 --- a/captum/attr/_utils/visualization.py +++ b/captum/attr/_utils/visualization.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 import warnings from enum import Enum -from typing import Any, Iterable, List, Tuple, Union +from typing import Any, Iterable, List, Optional, Tuple, Union import numpy as np -from matplotlib import pyplot as plt +from matplotlib import cm, colors, pyplot as plt +from matplotlib.collections import LineCollection from matplotlib.colors import LinearSegmentedColormap from matplotlib.figure import Figure from matplotlib.pyplot import axis, figure @@ -27,6 +28,12 @@ class ImageVisualizationMethod(Enum): alpha_scaling = 5 +class TimeseriesVisualizationMethod(Enum): + overlay_individual = 1 + overlay_combined = 2 + colored_graph = 3 + + class VisualizeSign(Enum): positive = 1 absolute_value = 2 @@ -61,10 +68,16 @@ def _cumulative_sum_threshold(values: ndarray, percentile: Union[int, float]): return sorted_vals[threshold_id] -def _normalize_image_attr( - attr: ndarray, sign: str, outlier_perc: Union[int, float] = 2 +def _normalize_attr( + attr: ndarray, + sign: str, + outlier_perc: Union[int, float] = 2, + reduction_axis: Optional[int] = None, ): - attr_combined = np.sum(attr, axis=2) + attr_combined = attr + if reduction_axis is not None: + attr_combined = np.sum(attr, axis=reduction_axis) + # Choose appropriate signed values and rescale, removing given outlier percentage. if VisualizeSign[sign] == VisualizeSign.all: threshold = _cumulative_sum_threshold(np.abs(attr_combined), 100 - outlier_perc) @@ -241,7 +254,7 @@ def visualize_image_attr( plt_axis.imshow(original_image) else: # Choose appropriate signed attributions and normalize. - norm_attr = _normalize_image_attr(attr, sign, outlier_perc) + norm_attr = _normalize_attr(attr, sign, outlier_perc, reduction_axis=2) # Set default colormap and bounds based on sign. if VisualizeSign[sign] == VisualizeSign.all: @@ -422,6 +435,311 @@ def visualize_image_attr_multiple( return plt_fig, plt_axis +def visualize_timeseries_attr( + attr: ndarray, + data: ndarray, + x_values: Optional[ndarray] = None, + method: str = "individual_channels", + sign: str = "absolute_value", + channel_labels: Optional[List[str]] = None, + channels_last: bool = True, + plt_fig_axis: Union[None, Tuple[figure, axis]] = None, + outlier_perc: Union[int, float] = 2, + cmap: Union[None, str] = None, + alpha_overlay: float = 0.7, + show_colorbar: bool = False, + title: Union[None, str] = None, + fig_size: Tuple[int, int] = (6, 6), + use_pyplot: bool = True, + **pyplot_kwargs, +): + r""" + Visualizes attribution for a given timeseries data by normalizing + attribution values of the desired sign (positive, negative, absolute value, + or all) and displaying them using the desired mode in a matplotlib figure. + + Args: + + attr (numpy.array): Numpy array corresponding to attributions to be + visualized. Shape must be in the form (N, C) with channels + as last dimension, unless `channels_last` is set to True. + Shape must also match that of the timeseries data. + data (numpy.array): Numpy array corresponding to the original, + equidistant timeseries data. Shape must be in the form + (N, C) with channels as last dimension, unless + `channels_last` is set to true. + x_values (numpy.array, optional): Numpy array corresponding to the + points on the x-axis. Shape must be in the form (N, ). If + not provided, integers from 0 to N-1 are used. + Default: None + method (string, optional): Chosen method for visualizing attributions + overlaid onto data. Supported options are: + + 1. `overlay_individual` - Plot each channel individually in + a separate panel, and overlay the attributions for each + channel as a heat map. The `alpha_overlay` parameter + controls the alpha of the heat map. + + 2. `overlay_combined` - Plot all channels in the same panel, + and overlay the average attributions as a heat map. + + 3. `colored_graph` - Plot each channel in a separate panel, + and color the graphs according to the attribution + values. Works best with color maps that does not contain + white or very bright colors. + Default: `overlay_individual` + sign (string, optional): Chosen sign of attributions to visualize. + Supported options are: + + 1. `positive` - Displays only positive pixel attributions. + + 2. `absolute_value` - Displays absolute value of + attributions. + + 3. `negative` - Displays only negative pixel attributions. + + 4. `all` - Displays both positive and negative attribution + values. + Default: `absolute_value` + channel_labels (list of strings, optional): List of labels + corresponding to each channel in data. + Default: None + channels_last (bool, optional): If True, data is expected to have + channels as the last dimension, i.e. (N, C). If False, data + is expected to have channels first, i.e. (C, N). + Default: True + plt_fig_axis (tuple, optional): Tuple of matplotlib.pyplot.figure and axis + on which to visualize. If None is provided, then a new figure + and axis are created. + Default: None + outlier_perc (float or int, optional): Top attribution values which + correspond to a total of outlier_perc percentage of the + total attribution are set to 1 and scaling is performed + using the minimum of these values. For sign=`all`, outliers + and scale value are computed using absolute value of + attributions. + Default: 2 + cmap (string, optional): String corresponding to desired colormap for + heatmap visualization. This defaults to "Reds" for negative + sign, "Blues" for absolute value, "Greens" for positive sign, + and a spectrum from red to green for all. Note that this + argument is only used for visualizations displaying heatmaps. + Default: None + alpha_overlay (float, optional): Alpha to set for heatmap when using + `blended_heat_map` visualization mode, which overlays the + heat map over the greyscaled original image. + Default: 0.7 + show_colorbar (boolean): Displays colorbar for heat map below + the visualization. + title (string, optional): Title string for plot. If None, no title is + set. + Default: None + fig_size (tuple, optional): Size of figure created. + Default: (6,6) + use_pyplot (boolean): If true, uses pyplot to create and show + figure and displays the figure after creating. If False, + uses Matplotlib object oriented API and simply returns a + figure object without showing. + Default: True. + pyplot_kwargs: Keyword arguments forwarded to plt.plot, for example + `linewidth=3`, `color='black'`, etc + + Returns: + 2-element tuple of **figure**, **axis**: + - **figure** (*matplotlib.pyplot.figure*): + Figure object on which visualization + is created. If plt_fig_axis argument is given, this is the + same figure provided. + - **axis** (*matplotlib.pyplot.axis*): + Axis object on which visualization + is created. If plt_fig_axis argument is given, this is the + same axis provided. + + Examples:: + + >>> # Classifier takes input of shape (batch, length, channels) + >>> model = Classifier() + >>> dl = DeepLift(model) + >>> attribution = dl.attribute(data, target=0) + >>> # Pick the first sample and plot each channel in data in a separate + >>> # panel, with attributions overlaid + >>> visualize_timeseries_attr(attribution[0], data[0], "overlay_individual") + """ + + # Check input dimensions + assert len(attr.shape) == 2, "Expected attr of shape (N, C), got {}".format( + attr.shape + ) + assert len(data.shape) == 2, "Expected data of shape (N, C), got {}".format( + attr.shape + ) + + # Convert to channels-first + if channels_last: + attr = np.transpose(attr) + data = np.transpose(data) + + num_channels = attr.shape[0] + timeseries_length = attr.shape[1] + + if num_channels > timeseries_length: + warnings.warn( + "Number of channels ({}) greater than time series length ({}), " + "please verify input format".format(num_channels, timeseries_length) + ) + + num_subplots = num_channels + if ( + TimeseriesVisualizationMethod[method] + == TimeseriesVisualizationMethod.overlay_combined + ): + num_subplots = 1 + attr = np.sum(attr, axis=0) # Merge attributions across channels + + if x_values is not None: + assert ( + x_values.shape[0] == timeseries_length + ), "x_values must have same length as data" + else: + x_values = np.arange(timeseries_length) + + # Create plot if figure, axis not provided + if plt_fig_axis is not None: + plt_fig, plt_axis = plt_fig_axis + else: + if use_pyplot: + plt_fig, plt_axis = plt.subplots( + figsize=fig_size, nrows=num_subplots, sharex=True + ) + else: + plt_fig = Figure(figsize=fig_size) + plt_axis = plt_fig.subplots(nrows=num_subplots, sharex=True) + + if not isinstance(plt_axis, ndarray): + plt_axis = np.array([plt_axis]) + + norm_attr = _normalize_attr(attr, sign, outlier_perc, reduction_axis=None) + + # Set default colormap and bounds based on sign. + if VisualizeSign[sign] == VisualizeSign.all: + default_cmap = LinearSegmentedColormap.from_list( + "RdWhGn", ["red", "white", "green"] + ) + vmin, vmax = -1, 1 + elif VisualizeSign[sign] == VisualizeSign.positive: + default_cmap = "Greens" + vmin, vmax = 0, 1 + elif VisualizeSign[sign] == VisualizeSign.negative: + default_cmap = "Reds" + vmin, vmax = 0, 1 + elif VisualizeSign[sign] == VisualizeSign.absolute_value: + default_cmap = "Blues" + vmin, vmax = 0, 1 + else: + raise AssertionError("Visualize Sign type is not valid.") + cmap = cmap if cmap is not None else default_cmap + cmap = cm.get_cmap(cmap) + cm_norm = colors.Normalize(vmin, vmax) + + def _plot_attrs_as_axvspan(attr_vals, x_vals, ax): + + half_col_width = (x_values[1] - x_values[0]) / 2.0 + for icol, col_center in enumerate(x_vals): + left = col_center - half_col_width + right = col_center + half_col_width + ax.axvspan( + xmin=left, + xmax=right, + facecolor=(cmap(cm_norm(attr_vals[icol]))), + edgecolor=None, + alpha=alpha_overlay, + ) + + if ( + TimeseriesVisualizationMethod[method] + == TimeseriesVisualizationMethod.overlay_individual + ): + + for chan in range(num_channels): + + plt_axis[chan].plot(x_values, data[chan, :], **pyplot_kwargs) + if channel_labels is not None: + plt_axis[chan].set_ylabel(channel_labels[chan]) + + _plot_attrs_as_axvspan(norm_attr[chan], x_values, plt_axis[chan]) + + plt.subplots_adjust(hspace=0) + + elif ( + TimeseriesVisualizationMethod[method] + == TimeseriesVisualizationMethod.overlay_combined + ): + + # Dark colors are better in this case + cycler = plt.cycler("color", cm.Dark2.colors) + plt_axis[0].set_prop_cycle(cycler) + + for chan in range(num_channels): + if channel_labels is not None: + label = channel_labels[chan] + else: + label = None + plt_axis[0].plot(x_values, data[chan, :], label=label, **pyplot_kwargs) + + _plot_attrs_as_axvspan(norm_attr, x_values, plt_axis[0]) + + plt_axis[0].legend(loc="best") + + elif ( + TimeseriesVisualizationMethod[method] + == TimeseriesVisualizationMethod.colored_graph + ): + + for chan in range(num_channels): + + points = np.array([x_values, data[chan, :]]).T.reshape(-1, 1, 2) + segments = np.concatenate([points[:-1], points[1:]], axis=1) + + lc = LineCollection(segments, cmap=cmap, norm=cm_norm, **pyplot_kwargs) + lc.set_array(norm_attr[chan, :]) + plt_axis[chan].add_collection(lc) + plt_axis[chan].set_ylim( + 1.2 * np.min(data[chan, :]), 1.2 * np.max(data[chan, :]) + ) + if channel_labels is not None: + plt_axis[chan].set_ylabel(channel_labels[chan]) + + plt.subplots_adjust(hspace=0) + + else: + raise AssertionError("Invalid visualization method: {}".format(method)) + + plt.xlim([x_values[0], x_values[-1]]) + + if show_colorbar: + axis_separator = make_axes_locatable(plt_axis[-1]) + colorbar_axis = axis_separator.append_axes("bottom", size="5%", pad=0.4) + colorbar_alpha = alpha_overlay + if ( + TimeseriesVisualizationMethod[method] + == TimeseriesVisualizationMethod.colored_graph + ): + colorbar_alpha = 1.0 + plt_fig.colorbar( + cm.ScalarMappable(cm_norm, cmap), + orientation="horizontal", + cax=colorbar_axis, + alpha=colorbar_alpha, + ) + if title: + plt_axis[0].set_title(title) + + if use_pyplot: + plt.show() + + return plt_fig, plt_axis + + # These visualization methods are for text and are partially copied from # experiments conducted by Davide Testuggine at Facebook. From 10d2379c105c125ddaafbfe635646219f4457f61 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 6 Jul 2022 14:42:59 -0600 Subject: [PATCH 069/174] Add missing Places365 InceptionModule docs --- .../models/_image/inception_v1_places365.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/captum/optim/models/_image/inception_v1_places365.py b/captum/optim/models/_image/inception_v1_places365.py index c5df0b85b0..b92bbb6e6e 100644 --- a/captum/optim/models/_image/inception_v1_places365.py +++ b/captum/optim/models/_image/inception_v1_places365.py @@ -290,14 +290,20 @@ def __init__( """ Args: - in_channels (int, optional): The number of input channels to use for the - inception module. - c1x1 (int, optional): - c3x3reduce (int, optional): - c3x3 (int, optional): - c5x5reduce (int, optional): - c5x5 (int, optional): - pool_proj (int, optional): + in_channels (int): The number of input channels to use for the first + layers of the inception module branches. + c1x1 (int): The number of output channels to use for the first layer in + the c1x1 branch. + c3x3reduce (int): The number of output channels to use for the first layer + in the c3x3 branch. + c3x3 (int): The number of output channels to use for the second layer in + the c3x3 branch. + c5x5reduce (int): The number of output channels to use for the first layer + in the c5x5 branch. + c5x5 (int): The number of output channels to use for the second layer in + the c5x5 branch. + pool_proj (int): The number of output channels to use for the second layer + in the pool branch. activ (type of nn.Module, optional): The nn.Module class type to use for activation layers. Default: ``nn.ReLU`` From b376466519d2f70b7de04e2087a1d875781d8816 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 9 Jul 2022 13:47:59 -0600 Subject: [PATCH 070/174] Improve Optimization docs --- captum/optim/_core/optimization.py | 33 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 4072b0f987..424d643282 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -71,13 +71,13 @@ def __init__( r""" Args: - model (nn.Module, optional): The reference to PyTorch model instance. - input_param (nn.Module, optional): A module that generates an input, - consumed by the model. - transform (nn.Module, optional): A module that transforms or preprocesses - the input before being passed to the model. - loss_function (callable): The loss function to minimize during optimization - optimization. + model (nn.Module, optional): The reference to PyTorch model instance. + input_param (nn.Module, optional): A module that generates an input, + consumed by the model. + transform (nn.Module, optional): A module that transforms or preprocesses + the input before being passed to the model. + loss_function (callable): The loss function to minimize during + optimization. """ self.model = model or nn.Identity() # Grab targets from loss_function @@ -100,9 +100,9 @@ def loss(self) -> torch.Tensor: r"""Compute loss value for current iteration. Returns: - *tensor* representing **loss**: - - **loss** (*tensor*): - Size of the tensor corresponds to the targets passed. + tensor representing **loss**: + - **loss** (torch.Tensor): Size of the tensor corresponds to the targets + passed. """ input_t = self.input_param() @@ -153,12 +153,13 @@ def optimize( Args: - stop_criteria (StopCriteria, optional): A function that is called + stop_criteria (StopCriteria, optional): A function that is called every iteration and returns a bool that determines whether to stop the optimization. Default: ``n_steps(512)`` - optimizer (Optimizer, optional): An ``torch.optim.Optimizer`` used to - optimize the input based on the loss function. + optimizer (torch.optim.Optimizer, optional): A ``torch.optim.Optimizer`` + instance to use for optimizing the input based on the loss function. + Default: ``torch.optim.Adam`` loss_summarize_fn (Callable, optional): The function to use for summarizing tensor outputs from loss functions. Default: ``default_loss_summarize`` @@ -200,12 +201,12 @@ def n_steps(n: int, show_progress: bool = True) -> StopCriteria: Args: - n (int): Number of steps to run optimization. - show_progress (bool, optional): Whether or not to show progress bar. + n (int): Number of steps to run optimization. + show_progress (bool, optional): Whether or not to show progress bar. Default: ``True`` Returns: - *StopCriteria* (callable): A stop criteria function. + StopCriteria (callable): A stop criteria function. """ if show_progress: From 1821a2da25ce6ebb9f4d16a4d99174db6a3f6e5b Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 10 Jul 2022 09:56:37 -0600 Subject: [PATCH 071/174] http -> https --- captum/optim/models/_image/inception_v1_places365.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/captum/optim/models/_image/inception_v1_places365.py b/captum/optim/models/_image/inception_v1_places365.py index b92bbb6e6e..81bb7b98c1 100644 --- a/captum/optim/models/_image/inception_v1_places365.py +++ b/captum/optim/models/_image/inception_v1_places365.py @@ -18,7 +18,7 @@ def googlenet_places365( **kwargs: Any, ) -> "InceptionV1Places365": r"""GoogLeNet (also known as Inception v1 & Inception 5h) model architecture from - `"Going Deeper with Convolutions" `_. + `"Going Deeper with Convolutions" `_. The pretrained GoogleNet model was trained using the MIT Places365 Standard dataset. See here for more information: https://arxiv.org/abs/1610.02055 From adaf3674883910b10a3ae53c29cbf6f9dd147261 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 10 Jul 2022 13:04:49 -0600 Subject: [PATCH 072/174] Improve InputOptimization docs --- captum/optim/_core/optimization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 424d643282..7d26946e87 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -33,8 +33,9 @@ class InputOptimization(Objective, Parameterized): This is similar to gradient-based methods for adversarial examples, such as FGSM. The code for this was based on the implementation by the authors of Lucid. For more details, see the following: - https://github.com/tensorflow/lucid - https://distill.pub/2017/feature-visualization/ + + * https://github.com/tensorflow/lucid + * https://distill.pub/2017/feature-visualization/ Example:: From 509accd805bd864c29abf92546116e010ffbe89a Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 14 Jul 2022 11:38:16 -0600 Subject: [PATCH 073/174] Improve loss docs --- captum/optim/_core/loss.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 252f569921..8c9011e8a2 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -924,7 +924,7 @@ def __init__( Default: ``torch.nn.functional.relu`` move_channel_dim_to_final_dim (bool, optional): Whether or not to move the channel dimension to the last dimension before computing the matrix - product. + product. Set to ``False`` if the using the channels last format. Default: ``True`` batch_index (int, optional): The index of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all @@ -988,8 +988,8 @@ def __init__( to use for batch dimension weighting. If using a single value, then it will be applied to all batch dimensions equally. Otherwise a list of floats with a shape of: [start, end] should be used for - ``torch.linspace`` to calculate the step values in between. Default is - set to ``None`` for no weighting. + :func:`torch.linspace` to calculate the step values in between. Default + is set to ``None`` for no weighting. Default: ``None`` batch_index (int, optional): The index of the activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all From 407f76903ea0ca1ced8161e7359d1a18c11ecedf Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 15 Jul 2022 10:46:17 -0600 Subject: [PATCH 074/174] Improve DeepDream docs --- captum/optim/_core/loss.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 8a4a1a65b9..33c840e037 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -515,9 +515,15 @@ class DeepDream(BaseLoss): Maximize 'interestingness' at the target layer. Mordvintsev et al., 2015. https://github.com/google/deepdream + This loss returns the squared layer activations. When combined with a negative mean loss summarization, this loss will create hallucinogenic visuals commonly referred to as 'Deep Dream'. + + DeepDream tries to increase the values of neurons proportional to the amount + they are presently active. This is equivalent to maximizing the sum of the + squares. If you remove the square, you'd be doing a direciton visualization + of: ``[1,1,1,....]``. """ def __init__( From 6f10b76c5e15639969c339f8dcd2f348595cd6e3 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 15 Jul 2022 11:50:33 -0600 Subject: [PATCH 075/174] Improve doc grammar --- captum/optim/_core/loss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 33c840e037..5f9e6cf62c 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -522,8 +522,8 @@ class DeepDream(BaseLoss): DeepDream tries to increase the values of neurons proportional to the amount they are presently active. This is equivalent to maximizing the sum of the - squares. If you remove the square, you'd be doing a direciton visualization - of: ``[1,1,1,....]``. + squares. If you remove the square, you'd be visualizing a direction of: + ``[1,1,1,....]`` (which is same as :class:`.LayerActivation`). """ def __init__( From eb5a961481d28720bc84f39b206983bd1d385895 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 15 Jul 2022 14:12:38 -0600 Subject: [PATCH 076/174] Fix nn.Module type hints --- captum/optim/_core/loss.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 5f9e6cf62c..1c4eabc285 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -209,7 +209,7 @@ def __init__( """ Args: - target (nn.Module or list of nn.module): A target nn.Module or list of + target (nn.Module or list of nn.Module): A target nn.Module or list of nn.Module. batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to @@ -339,7 +339,7 @@ def __init__( name (str, optional): The name of all composable operations in the instance. Default: ``""`` - target (nn.Module or list of nn.module): A target nn.Module or list of + target (nn.Module or list of nn.Module): A target nn.Module or list of nn.Module. """ super().__init__(target) @@ -1227,7 +1227,7 @@ def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor: """ Helper function to summarize tensor outputs from loss objectives. - default_loss_summarize applies `mean` to the loss tensor + default_loss_summarize applies :func:`torch.mean` to the loss tensor and negates it so that optimizing it maximizes the activations we are interested in. From a66e7f5179a575f928523f8b854c7b54cb2e3874 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 16 Jul 2022 09:15:30 -0600 Subject: [PATCH 077/174] Fix InputOptimization docs --- captum/optim/_core/optimization.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 7d26946e87..0bdfba8b6e 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -53,13 +53,14 @@ class InputOptimization(Objective, Parameterized): :ivar model: initial value (nn.Module): The given model instance given when initializing ``InputOptimization``. - :ivar input_param: initial value (ImageParameterization): The given image + :ivar input_param: initial value (InputParameterization): The given input parameterization instance given when initializing ``InputOptimization``. :ivar loss_function: initial value (Loss): The given composable loss instance given when initializing ``InputOptimization``. :ivar transform: initial value (nn.Module): The given transform instance given when initializing ``InputOptimization``. If it was set to ``None`` during - initialization, then an instance of ``torch.nn.Identity`` will be returned. + initialization, then an instance of :class:`torch.nn.Identity` will be + returned. """ def __init__( @@ -73,12 +74,12 @@ def __init__( Args: model (nn.Module, optional): The reference to PyTorch model instance. + loss_function (callable): The loss function to minimize during + optimization. input_param (nn.Module, optional): A module that generates an input, consumed by the model. transform (nn.Module, optional): A module that transforms or preprocesses the input before being passed to the model. - loss_function (callable): The loss function to minimize during - optimization. """ self.model = model or nn.Identity() # Grab targets from loss_function @@ -139,7 +140,7 @@ def parameters(self) -> Iterable[nn.Parameter]: """ Returns: parameters (iterable of nn.Parameter): An iterable of parameters in the - image parameterization. + input parameterization. """ return self.input_param.parameters() From f7812657a06810f838e4b2aa4ee55bfa942dfe09 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 16 Jul 2022 10:45:12 -0600 Subject: [PATCH 078/174] Fix loss doc type formating --- captum/optim/_core/loss.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 1c4eabc285..b3a100b673 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -209,9 +209,9 @@ def __init__( """ Args: - target (nn.Module or list of nn.Module): A target nn.Module or list of + target (nn.Module or List[nn.Module]): A target nn.Module or list of nn.Module. - batch_index (int or list of int, optional): The index or index range of + batch_index (int or List[int], optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -232,7 +232,7 @@ def __init__( def target(self) -> Union[nn.Module, List[nn.Module]]: """ Returns: - target (nn.Module or list of nn.Module): A target nn.Module or list of + target (nn.Module or List[nn.Module]): A target nn.Module or list of nn.Module. """ return self._target @@ -241,7 +241,7 @@ def target(self) -> Union[nn.Module, List[nn.Module]]: def batch_index(self) -> Tuple: """ Returns: - batch_index (tuple of int): A tuple of batch indices with a format + batch_index (Tuple[int]): A tuple of batch indices with a format of: (start, end). """ return self._batch_index @@ -339,7 +339,7 @@ def __init__( name (str, optional): The name of all composable operations in the instance. Default: ``""`` - target (nn.Module or list of nn.Module): A target nn.Module or list of + target (nn.Module or List[nn.Module]): A target nn.Module or list of nn.Module. """ super().__init__(target) @@ -395,7 +395,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. - batch_index (int or list of int, optional): The index or index range of + batch_index (int or List[int], optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -429,7 +429,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. channel_index (int): The index of the channel to optimize for. - batch_index (int or list of int, optional): The index or index range of + batch_index (int or List[int], optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -482,7 +482,7 @@ def __init__( unspecified, defaults to center, or one unit up of center for even heights. Default: ``None`` - batch_index (int or list of int, optional): The index or index range of + batch_index (int or List[int], optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -536,7 +536,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. - batch_index (int or list of int, optional): The index or index range of + batch_index (int or List[int], optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -571,7 +571,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. - batch_index (int or list of int, optional): The index or index range of + batch_index (int or List[int], optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -605,7 +605,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. constant (float): Constant threshold to deduct from the activations. - batch_index (int or list of int, optional): The index or index range of + batch_index (int or List[int], optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -642,7 +642,7 @@ def __init__( Default: ``0.0`` eps (float): Small value to add to L2 prior to sqrt. Default: ``1e-6`` - batch_index (int or list of int, optional): The index or index range of + batch_index (int or List[int], optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -681,11 +681,13 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. - batch_index (list of int, optional): The index range of activations to + batch_index (List[int], optional): The index range of activations to optimize. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: ``None`` """ + if batch_index: + assert len(batch_index) == 2 BaseLoss.__init__(self, target, batch_index) def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: @@ -799,7 +801,7 @@ def __init__( decay_ratio (float): How much to decay penalty as images move apart in the batch. Default: ``2.0`` - batch_index (list of int, optional): The index range of activations to + batch_index (List[int], optional): The index range of activations to optimize. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: ``None`` From 44260036a1649ca86f1eff8f1afe9f12f2764ccb Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 16 Jul 2022 10:52:42 -0600 Subject: [PATCH 079/174] Fix clip objective doc type formatting --- captum/optim/_core/loss.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 8c9011e8a2..5f10ddafcb 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -857,7 +857,7 @@ def __init__( target: torch.nn.Module, channel_index: Optional[int] = None, constant: float = 0.5, - batch_index: Optional[int] = None, + batch_index: Optional[Union[int, List[int]]] = None, ) -> None: """ Args: @@ -869,9 +869,10 @@ def __init__( Default: ``None`` constant (float, optional): Constant value to deduct from the activations. Default: ``0.5`` - batch_index (int, optional): The index of activations to optimize if - optimizing a batch of activations. If set to ``None``, defaults to all - activations in the batch. + batch_index (int or List[int], optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set + to ``None``, defaults to all activations in the batch. Index ranges + should be in the format of: [start, end]. Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) @@ -910,7 +911,7 @@ def __init__( vec: torch.Tensor, activation_fn: Optional[Callable] = torch.nn.functional.relu, move_channel_dim_to_final_dim: bool = True, - batch_index: Optional[int] = None, + batch_index: Optional[Union[int, List[int]]] = None, ) -> None: """ Args: @@ -926,9 +927,10 @@ def __init__( channel dimension to the last dimension before computing the matrix product. Set to ``False`` if the using the channels last format. Default: ``True`` - batch_index (int, optional): The index of activations to optimize if - optimizing a batch of activations. If set to ``None``, defaults to all - activations in the batch. + batch_index (int or List[int], optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set + to ``None``, defaults to all activations in the batch. Index ranges + should be in the format of: [start, end]. Default: ``None`` """ BaseLoss.__init__(self, target, batch_index) @@ -984,16 +986,17 @@ def __init__( facet_weights (torch.Tensor): Weighting that steers the objective towards a particular theme or concept. These weight values should come from linear probes trained on ``layer_target``. - strength (float, list of float, optional): A single float or list of floats + strength (float, List[float], optional): A single float or list of floats to use for batch dimension weighting. If using a single value, then it will be applied to all batch dimensions equally. Otherwise a list of floats with a shape of: [start, end] should be used for :func:`torch.linspace` to calculate the step values in between. Default is set to ``None`` for no weighting. Default: ``None`` - batch_index (int, optional): The index of the activations to optimize if - optimizing a batch of activations. If set to ``None``, defaults to all - activations in the batch. + batch_index (int or List[int], optional): The index or index range of + activations to optimize if optimizing a batch of activations. If set + to ``None``, defaults to all activations in the batch. Index ranges + should be in the format of: [start, end]. Default: ``None`` """ BaseLoss.__init__(self, [ultimate_target, layer_target], batch_index) From 936bc84f2e3fae8f330a77b1d159b0e57900518a Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sat, 16 Jul 2022 14:12:02 -0600 Subject: [PATCH 080/174] Update _common.py --- captum/optim/models/_common.py | 52 ++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 4b460058f8..6ced882ce8 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -16,6 +16,9 @@ def get_model_layers(model: nn.Module) -> List[str]: Args: model (nn.Module): A PyTorch model or module instance to collect layers from. + + Returns: + model_layers (List[str]): A list of hookable layers in the model. """ layers = [] @@ -68,6 +71,14 @@ class RedirectedReluLayer(nn.Module): @torch.jit.ignore def forward(self, input: torch.Tensor) -> torch.Tensor: + """ + Args: + + x (torch.Tensor): A tensor to pass through RedirectedReLU. + + Returns: + x (torch.Tensor): The output of RedirectedReLU. + """ return RedirectedReLU.apply(input) @@ -218,7 +229,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: """ Args: - x (torch.tensor): The input tensor to apply 2D convolution to. + x (torch.Tensor): The input tensor to apply 2D convolution to. Returns x (torch.Tensor): The input tensor after the 2D convolution was applied. @@ -254,7 +265,7 @@ def collect_activations( Args: model (nn.Module): A PyTorch model instance. - targets (nn.Module or list of nn.Module): One or more layer targets for the + targets (nn.Module or List[nn.Module]): One or more layer targets for the given model. model_input (torch.Tensor or tuple of torch.Tensor, optional): Optionally provide an input tensor to use when collecting the target activations. @@ -278,9 +289,9 @@ class SkipLayer(torch.nn.Module): during the forward pass. Use cases include removing nonlinear activation layers like ReLU for circuits research. - This layer works almost exactly the same way that ``nn.Indentiy`` does, except it - also ignores any additional arguments passed to the forward function. Any layer - replaced by ``SkipLayer`` must have the same input and output shapes. + This layer works almost exactly the same way that nn.Indentiy does, except it also + ignores any additional arguments passed to the forward function. Any layer replaced + by SkipLayer must have the same input and output shapes. See nn.Identity for more details: https://pytorch.org/docs/stable/generated/torch.nn.Identity.html @@ -290,24 +301,23 @@ def __init__(self, *args, **kwargs) -> None: """ Args: - args (Any): Any argument. Arguments will be safely ignored. - kwargs (Any) Any keyword argument. Arguments will be safely ignored. + args (Any, optional): Any argument. Arguments will be safely ignored. + kwargs (Any, optional) Any keyword argument. Arguments will be safely + ignored. """ super().__init__() - def forward( - self, x: Union[torch.Tensor, Tuple[torch.Tensor]], *args, **kwargs - ) -> Union[torch.Tensor, Tuple[torch.Tensor]]: + def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor: """ Args: - x (torch.Tensor or tuple of torch.Tensor): The input tensor or tensors. - args (Any): Any argument. Arguments will be safely ignored. - kwargs (Any) Any keyword argument. Arguments will be safely ignored. + x (torch.Tensor): The input tensor. + args (Any, optional): Any argument. Arguments will be safely ignored. + kwargs (Any, optional) Any keyword argument. Arguments will be safely + ignored. Returns: - x (torch.Tensor or tuple of torch.Tensor): The unmodified input tensor or - tensors. + x (torch.Tensor): The unmodified input tensor. """ return x @@ -316,17 +326,15 @@ def skip_layers( model: nn.Module, layers: Union[List[Type[nn.Module]], Type[nn.Module]] ) -> None: """ - This function is a wrapper function for - replace_layers and replaces the target layer - with layers that do nothing. - This is useful for removing the nonlinear ReLU - layers when creating expanded weights. + This function is a wrapper function for :func:`.replace_layers` and replaces the + target layer with layers that do nothing. This is useful for removing the nonlinear + ReLU layers when creating expanded weights. Args: model (nn.Module): A PyTorch model instance. - layers (nn.Module or list of nn.Module): The layer - class type to replace in the model. + layers (nn.Module or List[nn.Module]): The layer class type to replace in the + model. """ if not hasattr(layers, "__iter__"): layers = cast(Type[nn.Module], layers) From 42b18ca47f708cc553b8c047e9ebf180c7103ca5 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 17 Jul 2022 08:54:40 -0600 Subject: [PATCH 081/174] Add more assert checks --- captum/optim/_core/loss.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index b3a100b673..1d6f26c5d0 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -687,6 +687,7 @@ def __init__( Default: ``None`` """ if batch_index: + assert isinstance(batch_index, (list, tuple)) assert len(batch_index) == 2 BaseLoss.__init__(self, target, batch_index) @@ -807,6 +808,7 @@ def __init__( Default: ``None`` """ if batch_index: + assert isinstance(batch_index, (list, tuple)) assert len(batch_index) == 2 BaseLoss.__init__(self, target, batch_index) self.decay_ratio = decay_ratio From 96e2f8d016d619637c07b9c3c4093391d77890de Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 17 Jul 2022 12:17:25 -0600 Subject: [PATCH 082/174] Add aliases to InputOptimization and ImageTensor docs --- captum/optim/_core/optimization.py | 2 ++ captum/optim/_param/image/images.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 0bdfba8b6e..5636b63dbf 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -37,6 +37,8 @@ class InputOptimization(Objective, Parameterized): * https://github.com/tensorflow/lucid * https://distill.pub/2017/feature-visualization/ + Alias: ``captum.optim.InputOptimization`` + Example:: >>> model = opt.models.googlenet(pretrained=True) diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py index 5bb8555a17..ee50396572 100644 --- a/captum/optim/_param/image/images.py +++ b/captum/optim/_param/image/images.py @@ -25,6 +25,8 @@ class ImageTensor(torch.Tensor): A subclass of torch.Tensor that provides functions for easy loading, saving, and displaying image tensors. + Alias: ``captum.optim.ImageTensor`` + Example using file path or URL:: >>> image_tensor = opt.images.ImageTensor.load() From f31b8ca2dbbb28e47b4d23532674f76f726b7cf4 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Sun, 17 Jul 2022 14:27:46 -0600 Subject: [PATCH 083/174] Improve MaxPool2dRelaxed docs --- captum/optim/models/_common.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 6ced882ce8..3032bc4cf6 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -289,9 +289,9 @@ class SkipLayer(torch.nn.Module): during the forward pass. Use cases include removing nonlinear activation layers like ReLU for circuits research. - This layer works almost exactly the same way that nn.Indentiy does, except it also - ignores any additional arguments passed to the forward function. Any layer replaced - by SkipLayer must have the same input and output shapes. + This layer works almost exactly the same way that :class:`torch.nn.Identity` does, + except it also ignores any additional arguments passed to the forward function. + Any layer replaced by SkipLayer must have the same input and output shapes. See nn.Identity for more details: https://pytorch.org/docs/stable/generated/torch.nn.Identity.html @@ -355,9 +355,10 @@ class MaxPool2dRelaxed(torch.nn.Module): attributions of spatial posititions can be estimated using the rate at which increasing the neuron affects the output classes. - This layer peforms a MaxPool2d operation on the input, while using an equivalent - AvgPool2d layer to compute the gradient. This means that the forward pass returns - nn.MaxPool2d(input) while the backward pass uses nn.AvgPool2d(input). + This layer peforms a :class:`torch.nn.MaxPool2d` operation on the input, while + using an equivalent :class:`torch.nn.AvgPool2d` layer to compute the gradient. + This means that the forward pass returns ``nn.MaxPool2d(input)`` while the + backward pass uses ``nn.AvgPool2d(input)``. Carter, et al., "Activation Atlas", Distill, 2019. https://distill.pub/2019/activation-atlas/ From 199509ef6d091abb31b854c50c21606ca851064e Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 18 Jul 2022 15:07:50 -0600 Subject: [PATCH 084/174] Improve docstring type formatting --- captum/optim/_core/optimization.py | 44 +++++++++---------- captum/optim/_param/image/images.py | 8 ++-- captum/optim/models/_common.py | 24 +++++----- .../models/_image/inception_v1_places365.py | 10 ++--- 4 files changed, 43 insertions(+), 43 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 5636b63dbf..8b41e757fa 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -1,5 +1,3 @@ -"""captum.optim.optimization.""" - import warnings from typing import Callable, Iterable, Optional @@ -49,20 +47,6 @@ class InputOptimization(Objective, Parameterized): >>> obj = opt.InputOptimization(model, loss_fn, image, transform) >>> history = obj.optimize(opt.optimization.n_steps(512)) >>> image().show(figsize=(10, 10)) # Display results - - Instance variables that be used in the optimize function and StopCriteria - functions: - - :ivar model: initial value (nn.Module): The given model instance given when - initializing ``InputOptimization``. - :ivar input_param: initial value (InputParameterization): The given input - parameterization instance given when initializing ``InputOptimization``. - :ivar loss_function: initial value (Loss): The given composable loss instance - given when initializing ``InputOptimization``. - :ivar transform: initial value (nn.Module): The given transform instance given - when initializing ``InputOptimization``. If it was set to ``None`` during - initialization, then an instance of :class:`torch.nn.Identity` will be - returned. """ def __init__( @@ -76,12 +60,28 @@ def __init__( Args: model (nn.Module, optional): The reference to PyTorch model instance. - loss_function (callable): The loss function to minimize during - optimization. + loss_function (callable): The :mod:`.loss` objective instance to minimize + during optimization. input_param (nn.Module, optional): A module that generates an input, consumed by the model. transform (nn.Module, optional): A module that transforms or preprocesses the input before being passed to the model. + + Instance variables that be used in the :func:`.optimize` function and + StopCriteria functions: + + Attributes: + + model (torch.nn.Module): The given model instance given when initializing + ``InputOptimization``. If ``model`` was set to ``None`` during + initialization, then an instance of :class:`torch.nn.Identity` will be + returned. + input_param (InputParameterization): The given input parameterization + instance given when initializing ``InputOptimization``. + loss_function (Loss): The composable :mod:`.loss` instance given when + initializing ``InputOptimization``. + transform (torch.nn.Module): The given transform instance given when + initializing ``InputOptimization``. """ self.model = model or nn.Identity() # Grab targets from loss_function @@ -141,8 +141,8 @@ def targets(self, value: Iterable[nn.Module]) -> None: def parameters(self) -> Iterable[nn.Parameter]: """ Returns: - parameters (iterable of nn.Parameter): An iterable of parameters in the - input parameterization. + parameters (iterable of torch.nn.Parameter): An iterable of parameters in + the input parameterization. """ return self.input_param.parameters() @@ -164,10 +164,10 @@ def optimize( optimizer (torch.optim.Optimizer, optional): A ``torch.optim.Optimizer`` instance to use for optimizing the input based on the loss function. Default: ``torch.optim.Adam`` - loss_summarize_fn (Callable, optional): The function to use for summarizing + loss_summarize_fn (callable, optional): The function to use for summarizing tensor outputs from loss functions. Default: ``default_loss_summarize`` - lr: (float, optional): If no optimizer is given, then lr is used as the + lr (float, optional): If no optimizer is given, then lr is used as the learning rate for the Adam optimizer. Default: ``0.025`` diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py index ee50396572..16e5f625e0 100644 --- a/captum/optim/_param/image/images.py +++ b/captum/optim/_param/image/images.py @@ -22,8 +22,8 @@ class ImageTensor(torch.Tensor): r""" - A subclass of torch.Tensor that provides functions for easy loading, saving, and - displaying image tensors. + A subclass of :class:`torch.Tensor` that provides functions for easy loading, + saving, and displaying image tensors. Alias: ``captum.optim.ImageTensor`` @@ -138,8 +138,8 @@ def show( Args: - figsize (Tuple[int, int], optional): height & width to use - for displaying the ``ImageTensor`` figure. + figsize (tuple of int, optional): The height & width to use for displaying + the ``ImageTensor`` figure, in the format of: (height, width). Default: ``None`` scale (float, optional): Value to multiply the ``ImageTensor`` by so that it's value range is [0-255] for display. diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 3032bc4cf6..8b6695b33b 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -18,7 +18,7 @@ def get_model_layers(model: nn.Module) -> List[str]: model (nn.Module): A PyTorch model or module instance to collect layers from. Returns: - model_layers (List[str]): A list of hookable layers in the model. + model_layers (list of str): A list of hookable layers in the model. """ layers = [] @@ -101,16 +101,16 @@ def replace_layers( Args: - model: (nn.Module): A PyTorch model instance. - layer1: (Type[nn.Module]): The layer class that you want to transfer + model (nn.Module): A PyTorch model instance. + layer1 (Type[nn.Module]): The layer class that you want to transfer initialization variables from. - layer2: (Type[nn.Module]): The layer class to create with the variables + layer2 (Type[nn.Module]): The layer class to create with the variables from ``layer1``. transfer_vars (bool, optional): Whether or not to try and copy initialization variables from ``layer1`` instances to the replacement ``layer2`` instances. Default: ``False`` - kwargs: (Any, optional): Any additional variables to use when creating + kwargs (Any, optional): Any additional variables to use when creating the new layer. """ @@ -134,11 +134,11 @@ def _transfer_layer_vars( Args: - layer1: (nn.Module): A layer instance that you want to transfer + layer1 (nn.Module): A layer instance that you want to transfer initialization variables from. - layer2: (nn.Module): The layer class to create with the variables + layer2 (nn.Module): The layer class to create with the variables from of layer1. - kwargs: (Any, optional): Any additional variables to use when creating + kwargs (Any, optional): Any additional variables to use when creating the new layer. Returns: @@ -265,7 +265,7 @@ def collect_activations( Args: model (nn.Module): A PyTorch model instance. - targets (nn.Module or List[nn.Module]): One or more layer targets for the + targets (nn.Module or list of nn.Module): One or more layer targets for the given model. model_input (torch.Tensor or tuple of torch.Tensor, optional): Optionally provide an input tensor to use when collecting the target activations. @@ -333,7 +333,7 @@ def skip_layers( Args: model (nn.Module): A PyTorch model instance. - layers (nn.Module or List[nn.Module]): The layer class type to replace in the + layers (nn.Module or list of nn.Module): The layer class type to replace in the model. """ if not hasattr(layers, "__iter__"): @@ -382,8 +382,8 @@ def __init__( """ Args: - kernel_size (int or tuple of int): The size of the window to perform max & - average pooling with. + kernel_size (int or tuple of int): The size of the window to perform max + and average pooling with. stride (int or tuple of int, optional): The stride window size to use. Default: ``None`` padding (int or tuple of int): The amount of zero padding to add to both diff --git a/captum/optim/models/_image/inception_v1_places365.py b/captum/optim/models/_image/inception_v1_places365.py index 81bb7b98c1..62a6834e16 100644 --- a/captum/optim/models/_image/inception_v1_places365.py +++ b/captum/optim/models/_image/inception_v1_places365.py @@ -39,7 +39,7 @@ def googlenet_places365( model_path (str, optional): Optional path for the InceptionV1 model file. Default: ``None`` replace_relus_with_redirectedrelu (bool, optional): If ``True``, return - pretrained model with Redirected ReLU in place of ReLU layers. + pretrained model with :class:`.RedirectedReLU` in place of ReLU layers. Default: *``True``* when pretrained is True otherwise *``False``* use_linear_modules_only (bool, optional): If ``True``, return pretrained model with all nonlinear layers replaced with linear equivalents. @@ -112,7 +112,7 @@ def __init__( according to the method with which it was trained on Places365. Default: ``True`` replace_relus_with_redirectedrelu (bool, optional): If ``True``, return - pretrained model with Redirected ReLU in place of ReLU layers. + pretrained model with :class:`.RedirectedReLU` in place of ReLU layers. Default: ``False`` use_linear_modules_only (bool, optional): If ``True``, return pretrained model with all nonlinear layers replaced with linear equivalents. @@ -306,10 +306,10 @@ def __init__( in the pool branch. activ (type of nn.Module, optional): The nn.Module class type to use for activation layers. - Default: ``nn.ReLU`` + Default: :class:`torch.nn.ReLU` p_layer (type of nn.Module, optional): The nn.Module class type to use for pooling layers. - Default: ``nn.MaxPool2d`` + Default: :class:`torch.nn.MaxPool2d` """ super().__init__() self.conv_1x1 = nn.Conv2d( @@ -409,7 +409,7 @@ def __init__( Default: ``1008`` activ (type of nn.Module, optional): The ``nn.Module`` class type to use for activation layers. - Default: ``nn.ReLU`` + Default: :class:`torch.nn.ReLU` """ super().__init__() self.avg_pool = nn.AdaptiveAvgPool2d((4, 4)) From 2480b6925aa3413aec60f1936aceeea67fbe5159 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 18 Jul 2022 15:18:00 -0600 Subject: [PATCH 085/174] Fix loss docstring type hint formatting --- captum/optim/_core/loss.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 5f10ddafcb..0cc6033fd4 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -869,7 +869,7 @@ def __init__( Default: ``None`` constant (float, optional): Constant value to deduct from the activations. Default: ``0.5`` - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -919,7 +919,7 @@ def __init__( target (nn.Module): A target layer instance. vec (torch.Tensor): A 1D channel vector with the same size as the channel / feature dimension of the target layer instance. - activation_fn (Callable, optional): An optional activation function to + activation_fn (callable, optional): An optional activation function to apply to the activations before computing the matrix product. If set to ``None``, then no activation function will be used. Default: ``torch.nn.functional.relu`` @@ -927,7 +927,7 @@ def __init__( channel dimension to the last dimension before computing the matrix product. Set to ``False`` if the using the channels last format. Default: ``True`` - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -986,14 +986,14 @@ def __init__( facet_weights (torch.Tensor): Weighting that steers the objective towards a particular theme or concept. These weight values should come from linear probes trained on ``layer_target``. - strength (float, List[float], optional): A single float or list of floats + strength (float, list of float, optional): A single float or list of floats to use for batch dimension weighting. If using a single value, then it will be applied to all batch dimensions equally. Otherwise a list of floats with a shape of: [start, end] should be used for :func:`torch.linspace` to calculate the step values in between. Default is set to ``None`` for no weighting. Default: ``None`` - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. From a9eabfd446f4e1bdeb29e5e93aecc24fbe1fcc1d Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 18 Jul 2022 15:18:33 -0600 Subject: [PATCH 086/174] Fix loss docstring type hint formatting --- captum/optim/_core/loss.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 1d6f26c5d0..5c534613f2 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -209,9 +209,9 @@ def __init__( """ Args: - target (nn.Module or List[nn.Module]): A target nn.Module or list of + target (nn.Module or list of nn.Module): A target nn.Module or list of nn.Module. - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -232,7 +232,7 @@ def __init__( def target(self) -> Union[nn.Module, List[nn.Module]]: """ Returns: - target (nn.Module or List[nn.Module]): A target nn.Module or list of + target (nn.Module or list of nn.Module): A target nn.Module or list of nn.Module. """ return self._target @@ -241,7 +241,7 @@ def target(self) -> Union[nn.Module, List[nn.Module]]: def batch_index(self) -> Tuple: """ Returns: - batch_index (Tuple[int]): A tuple of batch indices with a format + batch_index (tuple of int): A tuple of batch indices with a format of: (start, end). """ return self._batch_index @@ -333,13 +333,13 @@ def __init__( """ Args: - loss_fn (Callable): A function that takes a dict of captured activations + loss_fn (callable): A function that takes a dict of captured activations with nn.Modules as keys, and then passes those activations through loss objective(s) & math operations. name (str, optional): The name of all composable operations in the instance. Default: ``""`` - target (nn.Module or List[nn.Module]): A target nn.Module or list of + target (nn.Module or list of nn.Module): A target nn.Module or list of nn.Module. """ super().__init__(target) @@ -395,7 +395,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -429,7 +429,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. channel_index (int): The index of the channel to optimize for. - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -482,7 +482,7 @@ def __init__( unspecified, defaults to center, or one unit up of center for even heights. Default: ``None`` - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -536,7 +536,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -571,7 +571,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -605,7 +605,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. constant (float): Constant threshold to deduct from the activations. - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -642,7 +642,7 @@ def __init__( Default: ``0.0`` eps (float): Small value to add to L2 prior to sqrt. Default: ``1e-6`` - batch_index (int or List[int], optional): The index or index range of + batch_index (int or list of int, optional): The index or index range of activations to optimize if optimizing a batch of activations. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. @@ -681,7 +681,7 @@ def __init__( target (nn.Module): A target layer, transform, or image parameterization instance to optimize the output of. - batch_index (List[int], optional): The index range of activations to + batch_index (list of int, optional): The index range of activations to optimize. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: ``None`` @@ -802,7 +802,7 @@ def __init__( decay_ratio (float): How much to decay penalty as images move apart in the batch. Default: ``2.0`` - batch_index (List[int], optional): The index range of activations to + batch_index (list of int, optional): The index range of activations to optimize. If set to ``None``, defaults to all activations in the batch. Index ranges should be in the format of: [start, end]. Default: ``None`` @@ -1188,9 +1188,10 @@ def sum_loss_list( Args: loss_list (list): A list of loss objectives. - to_scalar_fn (Callable): A function for converting loss objective outputs to - scalar values, in order to prevent size mismatches. - Default: ``torch.mean`` + to_scalar_fn (callable): A function for converting loss objective outputs to + scalar values, in order to prevent size mismatches. Set to + :class:`torch.nn.Identity` for no reduction op. + Default: :func:`torch.mean` Returns: loss_fn (CompositeLoss): A CompositeLoss instance containing all the loss From f2f1d5d3eccacc2201c8c50448f7570ea9f72773 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 18 Jul 2022 15:28:14 -0600 Subject: [PATCH 087/174] Fix bug in skip_layers --- captum/optim/models/_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 8b6695b33b..5f3cb7677a 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -275,7 +275,7 @@ def collect_activations( activ_dict (ModuleOutputMapping): A dictionary of collected activations where the keys are the target layers. """ - if not isinstance(targets, list): + if not isinstance(targets, (list, tuple)): targets = [targets] targets = list(dict.fromkeys(targets)) catch_activ = ActivationFetcher(model, targets) @@ -336,7 +336,7 @@ def skip_layers( layers (nn.Module or list of nn.Module): The layer class type to replace in the model. """ - if not hasattr(layers, "__iter__"): + if not isinstance(layers, (tuple, list)): layers = cast(Type[nn.Module], layers) replace_layers(model, layers, SkipLayer) else: From a61461bb424a012f95329ffaebc3f4cf3791d6a9 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 20 Jul 2022 13:34:59 -0600 Subject: [PATCH 088/174] Improve optimization docs --- captum/optim/_core/optimization.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 8b41e757fa..508f235534 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -59,16 +59,19 @@ def __init__( r""" Args: - model (nn.Module, optional): The reference to PyTorch model instance. - loss_function (callable): The :mod:`.loss` objective instance to minimize - during optimization. - input_param (nn.Module, optional): A module that generates an input, - consumed by the model. + model (nn.Module, optional): The reference to PyTorch model instance. Set + to ``None`` for no model instance. + loss_function (callable): The :mod:`Loss <.loss>` objective instance to + minimize during optimization. + input_param (InputParameterization, optional): A module that generates an + input, consumed by the model. Example: An + :mod:`ImageParameterization ` instance. transform (nn.Module, optional): A module that transforms or preprocesses - the input before being passed to the model. + the input before being passed to the model. Set to + :class:`torch.nn.Identity` for no transforms. - Instance variables that be used in the :func:`.optimize` function and - StopCriteria functions: + Instance variables that be used in the :func:`InputOptimization.optimize` + function, custom optimization functions, and StopCriteria functions: Attributes: @@ -78,8 +81,8 @@ def __init__( returned. input_param (InputParameterization): The given input parameterization instance given when initializing ``InputOptimization``. - loss_function (Loss): The composable :mod:`.loss` instance given when - initializing ``InputOptimization``. + loss_function (Loss): The composable :mod:`Loss <.loss>` instance given + when initializing ``InputOptimization``. transform (torch.nn.Module): The given transform instance given when initializing ``InputOptimization``. """ From 0ecff5d546ca27dd5f10e9db3f54ecfc6a3c5938 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 20 Jul 2022 14:01:34 -0600 Subject: [PATCH 089/174] Improve InputOptimization.optimize's docstring --- captum/optim/_core/optimization.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 508f235534..541c1d8078 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -156,20 +156,20 @@ def optimize( loss_summarize_fn: Optional[Callable] = None, lr: float = 0.025, ) -> torch.Tensor: - r"""Optimize input based on loss function and objectives. + r"""Optimize input based on loss function and objectives. Args: stop_criteria (StopCriteria, optional): A function that is called every iteration and returns a bool that determines whether to stop the optimization. - Default: ``n_steps(512)`` + Default: :func:`n_steps(512) <.n_steps>` optimizer (torch.optim.Optimizer, optional): A ``torch.optim.Optimizer`` instance to use for optimizing the input based on the loss function. - Default: ``torch.optim.Adam`` + Default: :class:`torch.optim.Adam` loss_summarize_fn (callable, optional): The function to use for summarizing tensor outputs from loss functions. - Default: ``default_loss_summarize`` + Default: :func:`.default_loss_summarize` lr (float, optional): If no optimizer is given, then lr is used as the learning rate for the Adam optimizer. Default: ``0.025`` From aeb058d291ea089aecb5b047f0266590c0e88419 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 21 Jul 2022 11:12:15 -0600 Subject: [PATCH 090/174] Improve InputOptimization docs --- captum/optim/_core/optimization.py | 4 ++-- captum/optim/models/_common.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 541c1d8078..0aac927116 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -29,8 +29,8 @@ class InputOptimization(Objective, Parameterized): """ Core function that optimizes an input to maximize a target (aka objective). This is similar to gradient-based methods for adversarial examples, such - as FGSM. The code for this was based on the implementation by the authors of Lucid. - For more details, see the following: + as :class:`FGSM `. The code for this was based on the + implementation by the authors of Lucid. For more details, see the following: * https://github.com/tensorflow/lucid * https://distill.pub/2017/feature-visualization/ diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 5f3cb7677a..8fcc2a978a 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -272,8 +272,8 @@ def collect_activations( Default: ``torch.zeros(1, 3, 224, 224)`` Returns: - activ_dict (ModuleOutputMapping): A dictionary of collected activations where - the keys are the target layers. + activ_dict (dict[nn.Module, torch.Tensor]): A dictionary of collected + activations where the keys are the target layers. """ if not isinstance(targets, (list, tuple)): targets = [targets] From 1faadcda863f6f3a532ba8bb4d5ca2ab2dbbe36e Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 21 Jul 2022 11:21:28 -0600 Subject: [PATCH 091/174] Fix doc spacing --- captum/optim/_core/optimization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 0aac927116..84b3b10a0f 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -156,7 +156,7 @@ def optimize( loss_summarize_fn: Optional[Callable] = None, lr: float = 0.025, ) -> torch.Tensor: - r"""Optimize input based on loss function and objectives. + r"""Optimize input based on loss function and objectives. Args: From a7fb6d941ce3edb6a5bccb3417c2a413185e8fa5 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 21 Jul 2022 12:45:50 -0600 Subject: [PATCH 092/174] Max line length doesn't apply to urls --- captum/optim/_core/loss.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 5c534613f2..c48539151a 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -957,10 +957,8 @@ class AngledNeuronDirection(BaseLoss): https://github.com/tensorflow/lucid/issues/116 This Lucid equivalents of this loss objective can be found here: - https://github.com/tensorflow/lucid/blob/master/notebooks/ - activation-atlas/activation-atlas-simple.ipynb - https://github.com/tensorflow/lucid/blob/master/notebooks/ - activation-atlas/class-activation-atlas.ipynb + https://github.com/tensorflow/lucid/blob/master/notebooks/activation-atlas/activation-atlas-simple.ipynb + https://github.com/tensorflow/lucid/blob/master/notebooks/activation-atlas/class-activation-atlas.ipynb Like the Lucid equivalents, our implementation differs slightly from the associated research paper. From 2cfa21be999a54e4b47984ea5dfd74c0f7b000cf Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 21 Jul 2022 19:35:16 -0600 Subject: [PATCH 093/174] Add Optim to run_mypy.sh --- scripts/run_mypy.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/run_mypy.sh b/scripts/run_mypy.sh index d2f7c8d076..2497be44bc 100755 --- a/scripts/run_mypy.sh +++ b/scripts/run_mypy.sh @@ -5,6 +5,7 @@ set -e # hints. mypy -p captum.attr --ignore-missing-imports --allow-redefinition +mypy -p captum.optim --ignore-missing-imports --allow-redefinition mypy -p captum.insights --ignore-missing-imports --allow-redefinition mypy -p captum.metrics --ignore-missing-imports --allow-redefinition mypy -p captum.robust --ignore-missing-imports --allow-redefinition From 6a40ca60a248d797bae63cb6be6b961a586f9051 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 25 Jul 2022 15:18:57 -0700 Subject: [PATCH 094/174] Don't link directly to arXiv PDF files in algorithms.md (#995) Summary: arXiv offers multiple options for viewing a paper, so it's best to let the user decide which one they want. Pull Request resolved: https://github.com/pytorch/captum/pull/995 Reviewed By: Reubend Differential Revision: D38026675 Pulled By: NarineK fbshipit-source-id: 5846218c6cce8d985f79f4375306bf44f5be41b4 --- docs/algorithms.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/algorithms.md b/docs/algorithms.md index 80d50550a0..b06a8aa5f1 100644 --- a/docs/algorithms.md +++ b/docs/algorithms.md @@ -37,7 +37,7 @@ To learn more about GradientSHAP, visit the following resources: - [Original Implementation](https://github.com/slundberg/shap/#deep-learning-example-with-gradientexplainer-tensorflowkeraspytorch-models) ### DeepLIFT -DeepLIFT is a back-propagation based approach that attributes a change to inputs based on the differences between the inputs and corresponding references (or baselines) for non-linear activations. As such, DeepLIFT seeks to explain the difference in the output from reference in terms of the difference in inputs from reference. DeepLIFT uses the concept of multipliers to "blame" specific neurons for the difference in output. The definition of a multiplier is as follows (from [original paper](https://arxiv.org/pdf/1704.02685.pdf)): +DeepLIFT is a back-propagation based approach that attributes a change to inputs based on the differences between the inputs and corresponding references (or baselines) for non-linear activations. As such, DeepLIFT seeks to explain the difference in the output from reference in terms of the difference in inputs from reference. DeepLIFT uses the concept of multipliers to "blame" specific neurons for the difference in output. The definition of a multiplier is as follows (from [original paper](https://arxiv.org/abs/1704.02685)): ![deepLIFT_eq1](/img/deepLIFT_multipliers_eq1.png) *x is the input neuron with a difference from reference Δx, and t is the target neuron with a difference from reference Δt. C is then the contribution of Δx to Δt.* @@ -62,7 +62,7 @@ To learn more about DeepLIFT SHAP, visit the following resources: Saliency is a simple approach for computing input attribution, returning the gradient of the output with respect to the input. This approach can be understood as taking a first-order Taylor expansion of the network at the input, and the gradients are simply the coefficients of each feature in the linear representation of the model. The absolute value of these coefficients can be taken to represent feature importance. To learn more about Saliency, visit the following resources: -- [Original paper](https://arxiv.org/pdf/1312.6034.pdf) +- [Original paper](https://arxiv.org/abs/1312.6034) ### Input X Gradient Input X Gradient is an extension of the saliency approach, taking the gradients of the output with respect to the input and multiplying by the input feature values. One intuition for this approach considers a linear model; the gradients are simply the coefficients of each input, and the product of the input with a coefficient corresponds to the total contribution of the feature to the linear model's output. @@ -141,17 +141,17 @@ Conductance combines the neuron activation with the partial derivatives of both Conductance builds on Integrated Gradients (IG) by looking at the flow of IG attribution which occurs through the hidden neuron. The formal definition of total conductance of a hidden neuron *y* (from the [original paper](https://arxiv.org/abs/1805.12233)) is as follows: ![conductance_eq1](/img/conductance_eq_1.png) -For more efficient computation of layer conductance, we use the idea presented in this [paper](https://arxiv.org/pdf/1807.09946.pdf) to avoid computing the gradient of each neuron with respect to the input. +For more efficient computation of layer conductance, we use the idea presented in this [paper](https://arxiv.org/abs/1807.09946) to avoid computing the gradient of each neuron with respect to the input. To learn more about Conductance, visit the following resources: - [Original Paper](https://arxiv.org/abs/1805.12233) -- [Computationally Efficient Measures of Internal Neuron Importance](https://arxiv.org/pdf/1807.09946.pdf) +- [Computationally Efficient Measures of Internal Neuron Importance](https://arxiv.org/abs/1807.09946) ### Internal Influence Internal Influence approximates the integral of gradients with respect to a particular layer along the path from a baseline input to the given input. This method is similar to applying integrated gradients, integrating the gradient with respect to the layer (rather than the input). To learn more about Internal Influence, visit the following resources: -- [Original Paper](https://arxiv.org/pdf/1802.03788.pdf) +- [Original Paper](https://arxiv.org/abs/1802.03788) ### Layer Activation Layer Activation is a simple approach for computing layer attribution, returning the activation of each neuron in the identified layer. @@ -208,7 +208,7 @@ Note that based on this definition, summing the neuron conductance (over all inp To learn more about Conductance, visit the following resources: - [Original Paper](https://arxiv.org/abs/1805.12233) -- [Computationally Efficient Measures of Internal Neuron Importance](https://arxiv.org/pdf/1807.09946.pdf) +- [Computationally Efficient Measures of Internal Neuron Importance](https://arxiv.org/abs/1807.09946) ### Neuron Gradient Neuron gradient is the analog of the saliency method for a particular neuron in a network. It simply computes the gradient of the neuron output with respect to the model input. Like Saliency, this approach can be understood as taking a first-order Taylor expansion of the neuron's output at the given input, and the gradients correspond to the coefficients of each feature in the linear representation of the model. @@ -259,9 +259,9 @@ To learn more about Noise Tunnel methods, visit the following resources: Infidelity measures the mean squared error between model explanations in the magnitudes of input perturbations and predictor function's changes to those input perturbtaions. Infidelity is defined as follows: ![infidelity_eq](/img/infidelity_eq.png) It is derived from the completeness property of well-known attribution algorithms, such as Integrated Gradients, and is a computationally more efficient and generalized notion of Sensitivy-n. The latter measures correlations between the sum of the attributions and the differences of the predictor function at its input and fixed baseline. More details about the Sensitivity-n can be found here: -https://arxiv.org/pdf/1711.06104.pdfs +https://arxiv.org/abs/1711.06104 More details about infidelity measure can be found here: -- [Original paper](https://arxiv.org/pdf/1901.09392.pdf) +- [Original paper](https://arxiv.org/abs/1901.09392) ### Sensitivity Sensitivity measures the degree of explanation changes to subtle input perturbations using Monte Carlo sampling-based approximation and is defined @@ -270,4 +270,4 @@ as follows: In order to approximate sensitivity measure, by default, we sample from a sub-space of an L-Infinity ball with a default radius. The users can modify both the radius of the ball and the sampling function. More details about sensitivity measure can be found here: -- [Original paper](https://arxiv.org/pdf/1901.09392.pdf) +- [Original paper](https://arxiv.org/abs/1901.09392) From 65b4a841ee369d7a9e457925d0cb93a26470422c Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 25 Jul 2022 15:40:33 -0700 Subject: [PATCH 095/174] Remove the `insights` module from the main `__init__.py` file (#992) Summary: Fixes: https://github.com/pytorch/captum/issues/988 According the `setup.py` and the `README`, users wishing to use insights need to use the custom install option provided or install the modules separately. https://github.com/pytorch/captum/blob/master/setup.py#L54, https://github.com/pytorch/captum/blob/master/README.md#installation Therefore the insights module should not be loaded in the `__init__.py` file, and users will have to call it like before the changes proposed in https://github.com/pytorch/captum/pull/912, and added to the master branch in https://github.com/pytorch/captum/commit/9305b109417ca24ee6893075e03f3da241e59252 Pull Request resolved: https://github.com/pytorch/captum/pull/992 Reviewed By: Reubend Differential Revision: D38026733 Pulled By: NarineK fbshipit-source-id: d9bf407f461c2c1381291480654b8fc6923579c0 --- captum/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/captum/__init__.py b/captum/__init__.py index fda440d4f5..c433fc4a4d 100644 --- a/captum/__init__.py +++ b/captum/__init__.py @@ -2,7 +2,6 @@ import captum.attr as attr # noqa import captum.concept as concept # noqa import captum.influence as influence # noqa -import captum.insights as insights # noqa import captum.log as log # noqa import captum.metrics as metrics # noqa import captum.robust as robust # noqa From 9e1538eee473b34072eb8e0c99b86fd5ba712f12 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Tue, 26 Jul 2022 11:10:16 -0700 Subject: [PATCH 096/174] Fix some docstrings (#996) Summary: The "optional" part should come last. Pull Request resolved: https://github.com/pytorch/captum/pull/996 Reviewed By: 99warriors, Reubend Differential Revision: D38026646 Pulled By: NarineK fbshipit-source-id: 6648b3a2183dd4c8d29956e356f1ac5cd90d23cd --- captum/attr/_core/lime.py | 6 +++--- captum/robust/_core/metrics/attack_comparator.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/captum/attr/_core/lime.py b/captum/attr/_core/lime.py index 520251ce53..f5ad7877bc 100644 --- a/captum/attr/_core/lime.py +++ b/captum/attr/_core/lime.py @@ -734,7 +734,7 @@ def __init__( forward_func (callable): The forward function of the model or any modification of it - interpretable_model (optional, Model): Model object to train + interpretable_model (Model, optional): Model object to train interpretable model. This argument is optional and defaults to SkLearnLasso(alpha=0.01), @@ -760,7 +760,7 @@ def __init__( Note that calling fit multiple times should retrain the interpretable model, each attribution call reuses the same given interpretable model object. - similarity_func (optional, callable): Function which takes a single sample + similarity_func (callable, optional): Function which takes a single sample along with its corresponding interpretable representation and returns the weight of the interpretable sample for training the interpretable model. @@ -793,7 +793,7 @@ def __init__( kwargs includes baselines, feature_mask, num_interp_features (integer, determined from feature mask). - perturb_func (optional, callable): Function which returns a single + perturb_func (callable, optional): Function which returns a single sampled input, which is a binary vector of length num_interp_features, or a generator of such tensors. diff --git a/captum/robust/_core/metrics/attack_comparator.py b/captum/robust/_core/metrics/attack_comparator.py index 57b03e8f18..b9ebb59ad6 100644 --- a/captum/robust/_core/metrics/attack_comparator.py +++ b/captum/robust/_core/metrics/attack_comparator.py @@ -118,7 +118,7 @@ def add_attack( or any other perturbation or attack function such as a torchvision transform. - name (optional, str): Name or identifier for attack, used as key for + name (str, optional): Name or identifier for attack, used as key for attack results. This defaults to attack.__class__.__name__ if not provided and must be unique for all added attacks. From 1c50b87007178466f2fa81c428e1e2a40fe0a860 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 08:56:38 -0600 Subject: [PATCH 097/174] Fix grammar --- captum/optim/_utils/image/dataset.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/captum/optim/_utils/image/dataset.py b/captum/optim/_utils/image/dataset.py index 5319e4b9a6..7f03129ac7 100644 --- a/captum/optim/_utils/image/dataset.py +++ b/captum/optim/_utils/image/dataset.py @@ -57,8 +57,8 @@ def dataset_cov_matrix( dataloader instance. show_progress (bool, optional): Whether or not to display a tqdm progress bar. Default: ``False`` - device (torch.device, optional): The PyTorch device to use for for calculating - the cov matrix. + device (torch.device, optional): The PyTorch device to use for calculating the + cov matrix. Default: ``torch.device("cpu")`` Returns: @@ -148,8 +148,8 @@ def dataset_klt_matrix( Default: ``False`` show_progress (bool, optional): Whether or not to display a tqdm progress bar. Default: ``False`` - device (torch.device, optional): The PyTorch device to use for for calculating - the cov matrix. + device (torch.device, optional): The PyTorch device to use for calculating the + cov matrix. Default: ``torch.device("cpu")`` Returns: From 27b702ed7bade5c9b6b9d09bd5864d10c0701acf Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 09:30:20 -0600 Subject: [PATCH 098/174] Fix spelling --- captum/optim/models/_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 8fcc2a978a..2ba15cfa0d 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -198,7 +198,7 @@ def __init__( kernel points. Default: ``1`` groups (int, optional): Number of blocked connections from input channels - to output channels. Both in_channels and out_channels must be divisable + to output channels. Both in_channels and out_channels must be divisible by groups. Default: ``1`` bias (bool, optional): Whether or not to apply a learnable bias to the From 7924b87d3f1ee45d27fc27c32697317a28a57fe1 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 09:30:51 -0600 Subject: [PATCH 099/174] Remove Optim from run_mypy.sh for now --- scripts/run_mypy.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/run_mypy.sh b/scripts/run_mypy.sh index 2497be44bc..d2f7c8d076 100755 --- a/scripts/run_mypy.sh +++ b/scripts/run_mypy.sh @@ -5,7 +5,6 @@ set -e # hints. mypy -p captum.attr --ignore-missing-imports --allow-redefinition -mypy -p captum.optim --ignore-missing-imports --allow-redefinition mypy -p captum.insights --ignore-missing-imports --allow-redefinition mypy -p captum.metrics --ignore-missing-imports --allow-redefinition mypy -p captum.robust --ignore-missing-imports --allow-redefinition From 3a2194fc158c930252dac326dd8f7c13a24a1a44 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 11:28:47 -0600 Subject: [PATCH 100/174] Remove loss_wrapper tests --- tests/optim/core/test_loss.py | 212 +++++++++++++++------------------- 1 file changed, 92 insertions(+), 120 deletions(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index cbf99912d0..92f1bd004e 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -42,6 +42,86 @@ def get_loss_value( return loss(module_outputs).detach() +class TestModuleOP(BaseTest): + def test_module_op_loss_unary_op(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping ModuleOP unary op test due to insufficient Torch" + + " version." + ) + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ChannelActivation(model.layer, 0) + composed_loss = opt_loss.module_op(loss, None, operator.neg) + + expected_name = "ChannelActivation [Conv2d(3, 2, ke..., 0]" + self.assertEqual(composed_loss.__name__, expected_name) + output = get_loss_value(model, composed_loss) + expected = -torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS]).sum().item() + self.assertEqual(output, expected) + + def test_module_op_loss_num_add(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping ModuleOP loss add num test due to insufficient Torch" + + " version." + ) + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ChannelActivation(model.layer, 0) + composed_loss = opt_loss.module_op(loss, 1.0, operator.add) + + expected_name = "ChannelActivation [Conv2d(3, 2, ke..., 0]" + self.assertEqual(composed_loss.__name__, expected_name) + output = get_loss_value(model, composed_loss) + expected = torch.tensor([CHANNEL_ACTIVATION_0_LOSS]) + 1.0 + self.assertEqual(output, expected.item()) + + def test_module_op_loss_loss_add(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping ModuleOP Loss add Loss test due to insufficient Torch" + + " version." + ) + model = BasicModel_ConvNet_Optim() + loss1 = opt_loss.ChannelActivation(model.layer, 0) + loss2 = opt_loss.ChannelActivation(model.layer, 1) + composed_loss = opt_loss.module_op(loss1, loss2, operator.add) + + expected_name = ( + "Compose(ChannelActivation [Conv2d(3, 2, ke..., 0], " + + "ChannelActivation [Conv2d(3, 2, ke..., 1])" + ) + self.assertEqual(composed_loss.__name__, expected_name) + output = get_loss_value(model, composed_loss) + expected = ( + torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_0_LOSS]) + .sum() + .item() + ) + self.assertEqual(output, expected) + + def test_module_op_loss_pow_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + loss = opt_loss.ChannelActivation(model.layer, 0) + opt_loss.module_op(loss, "string", operator.pow) # type: ignore + + +class TestRModuleOP(BaseTest): + def test_module_op_loss_num_div(self) -> None: + model = BasicModel_ConvNet_Optim() + loss = opt_loss.ChannelActivation(model.layer, 0) + composed_loss = opt_loss.rmodule_op(loss, 1.0, operator.pow) + + output = get_loss_value(model, composed_loss) + self.assertEqual(output, 1.0**CHANNEL_ACTIVATION_0_LOSS) + + def test_rmodule_op_loss_pow_error(self) -> None: + model = BasicModel_ConvNet_Optim() + with self.assertRaises(TypeError): + loss = opt_loss.ChannelActivation(model.layer, 0) + opt_loss.rmodule_op(loss, "string", operator.pow) # type: ignore + + class TestDeepDream(BaseTest): def test_deepdream(self) -> None: model = BasicModel_ConvNet_Optim() @@ -92,6 +172,18 @@ def test_layer_activation_batch_index(self) -> None: self, output, model_input[batch_index : batch_index + 1], delta=0.0 ) + def test_layer_activation_batch_index_negative(self) -> None: + model = torch.nn.Identity() + batch_index = -2 + loss = opt.loss.LayerActivation(model, batch_index=batch_index) + + model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() + output = get_loss_value(model, loss, model_input) + self.assertEqual(loss.batch_index, (batch_index, batch_index + 1)) + assertTensorAlmostEqual( + self, output, model_input[batch_index : batch_index + 1], delta=0.0 + ) + class TestChannelActivation(BaseTest): def test_channel_activation_init(self) -> None: @@ -772,128 +864,8 @@ def test_sum_loss_list_identity(self) -> None: self.assertEqual(out.sum().item(), 30000.0) -class TestModuleOP(BaseTest): - def test_module_op_loss_unary_op(self) -> None: - if version.parse(torch.__version__) <= version.parse("1.6.0"): - raise unittest.SkipTest( - "Skipping ModuleOP unary op test due to insufficient Torch" - + " version." - ) - model = BasicModel_ConvNet_Optim() - loss = opt_loss.ChannelActivation(model.layer, 0) - composed_loss = opt_loss.module_op(loss, None, operator.neg) - - expected_name = "ChannelActivation [Conv2d(3, 2, ke..., 0]" - self.assertEqual(composed_loss.__name__, expected_name) - output = get_loss_value(model, composed_loss) - expected = -torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS]).sum().item() - self.assertEqual(output, expected) - - def test_module_op_loss_num_add(self) -> None: - if version.parse(torch.__version__) <= version.parse("1.6.0"): - raise unittest.SkipTest( - "Skipping ModuleOP loss add num test due to insufficient Torch" - + " version." - ) - model = BasicModel_ConvNet_Optim() - loss = opt_loss.ChannelActivation(model.layer, 0) - composed_loss = opt_loss.module_op(loss, 1.0, operator.add) - - expected_name = "ChannelActivation [Conv2d(3, 2, ke..., 0]" - self.assertEqual(composed_loss.__name__, expected_name) - output = get_loss_value(model, composed_loss) - expected = torch.tensor([CHANNEL_ACTIVATION_0_LOSS]) + 1.0 - self.assertEqual(output, expected.item()) - - def test_module_op_loss_loss_add(self) -> None: - if version.parse(torch.__version__) <= version.parse("1.6.0"): - raise unittest.SkipTest( - "Skipping ModuleOP Loss add Loss test due to insufficient Torch" - + " version." - ) - model = BasicModel_ConvNet_Optim() - loss1 = opt_loss.ChannelActivation(model.layer, 0) - loss2 = opt_loss.ChannelActivation(model.layer, 1) - composed_loss = opt_loss.module_op(loss1, loss2, operator.add) - - expected_name = ( - "Compose(ChannelActivation [Conv2d(3, 2, ke..., 0], " - + "ChannelActivation [Conv2d(3, 2, ke..., 1])" - ) - self.assertEqual(composed_loss.__name__, expected_name) - output = get_loss_value(model, composed_loss) - expected = ( - torch.as_tensor([CHANNEL_ACTIVATION_0_LOSS, CHANNEL_ACTIVATION_0_LOSS]) - .sum() - .item() - ) - self.assertEqual(output, expected) - - def test_module_op_loss_pow_error(self) -> None: - model = BasicModel_ConvNet_Optim() - with self.assertRaises(TypeError): - loss = opt_loss.ChannelActivation(model.layer, 0) - opt_loss.module_op(loss, "string", operator.pow) # type: ignore - - -class TestRModuleOP(BaseTest): - def test_module_op_loss_num_div(self) -> None: - model = BasicModel_ConvNet_Optim() - loss = opt_loss.ChannelActivation(model.layer, 0) - composed_loss = opt_loss.rmodule_op(loss, 1.0, operator.pow) - - output = get_loss_value(model, composed_loss) - self.assertEqual(output, 1.0**CHANNEL_ACTIVATION_0_LOSS) - - def test_rmodule_op_loss_pow_error(self) -> None: - model = BasicModel_ConvNet_Optim() - with self.assertRaises(TypeError): - loss = opt_loss.ChannelActivation(model.layer, 0) - opt_loss.rmodule_op(loss, "string", operator.pow) # type: ignore - - class TestDefaultLossSummarize(BaseTest): def test_default_loss_summarize(self) -> None: x = torch.arange(0, 1 * 3 * 5 * 5).view(1, 3, 5, 5).float() output = opt_loss.default_loss_summarize(x) self.assertEqual(output.item(), -37.0) - - -class TestMakeArgStr(BaseTest): - def test_make_arg_str(self) -> None: - args = {"a": 5, "b": None} - output = opt_loss._make_arg_str(args) - self.assertEqual(output, "{'a': 5, 'b': N...") - args = {"c": torch.nn.Identity, "d": "test"} - output = opt_loss._make_arg_str(args) - self.assertEqual(output, "{'c': None: - @opt_loss.loss_wrapper - class TestClass: - def __init__( - self, - target: torch.nn.Module, - test_var: int, - batch_index: Optional[int] = None, - ) -> None: - self.target = target - self.batch_index = batch_index - self.test_var = test_var - - def __call__(self) -> int: - return self.test_var - - test_module = TestClass(torch.nn.Identity(), test_var=5, batch_index=0) - expected = "TestClass [Identity()]" - self.assertEqual(test_module.__name__, expected) # type: ignore - - test_module = TestClass(torch.nn.Identity(), 5, 0) - expected = "TestClass [Identity(), 5, 0]" - self.assertEqual(test_module.__name__, expected) # type: ignore - - test_module = TestClass(torch.nn.Identity(), 5) - expected = "TestClass [Identity(), 5]" - self.assertEqual(test_module.__name__, expected) # type: ignore From 819a0a8c4083d19235d82901d3bb6a0783b30443 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 11:29:13 -0600 Subject: [PATCH 101/174] Remove `loss_wrapper` --- captum/optim/_core/loss.py | 45 +------------------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index c48539151a..90f7f36a1d 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -9,20 +9,6 @@ from captum.optim._utils.typing import ModuleOutputMapping -def _make_arg_str(arg: Any) -> str: - """ - Args: - - args (Any): A set of arguments to covert to a string. - - Returns: - args (str): The args in str form. - """ - arg = str(arg) - too_big = len(arg) > 15 or "\n" in arg - return arg[:15] + "..." if too_big else arg - - class Loss(ABC): """ Abstract Class to describe loss. @@ -32,6 +18,7 @@ class Loss(ABC): def __init__(self) -> None: super().__init__() + self.__name__ = self.__class__.__name__ @abstractproperty def target(self) -> Union[nn.Module, List[nn.Module]]: @@ -362,22 +349,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return self.loss_fn(targets_to_values) -def loss_wrapper(cls: Any) -> Callable: - """ - Primarily for naming purposes. - """ - - @functools.wraps(cls) - def wrapper(*args, **kwargs) -> object: - obj = cls(*args, **kwargs) - args_str = " [" + ", ".join([_make_arg_str(arg) for arg in args]) + "]" - obj.__name__ = cls.__name__ + args_str - return obj - - return wrapper - - -@loss_wrapper class LayerActivation(BaseLoss): """ Maximize activations at the target layer. @@ -409,7 +380,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return activations -@loss_wrapper class ChannelActivation(BaseLoss): """ Maximize activations at the target layer and target channel. @@ -451,7 +421,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: ] -@loss_wrapper class NeuronActivation(BaseLoss): """ This loss maximizes the activations of a target neuron in the specified channel @@ -509,7 +478,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: ] -@loss_wrapper class DeepDream(BaseLoss): """ Maximize 'interestingness' at the target layer. @@ -550,7 +518,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return activations**2 -@loss_wrapper class TotalVariation(BaseLoss): """ Total variation denoising penalty for activations. @@ -587,7 +554,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return torch.sum(torch.abs(x_diff)) + torch.sum(torch.abs(y_diff)) -@loss_wrapper class L1(BaseLoss): """ L1 norm of the target layer, generally used as a penalty. @@ -620,7 +586,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return torch.abs(activations - self.constant).sum() -@loss_wrapper class L2(BaseLoss): """ L2 norm of the target layer, generally used as a penalty. @@ -660,7 +625,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return torch.sqrt(self.eps + activations) -@loss_wrapper class Diversity(BaseLoss): """ Use a cosine similarity penalty to extract features from a polysemantic neuron. @@ -709,7 +673,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: ) -@loss_wrapper class ActivationInterpolation(BaseLoss): """ Interpolate between two different layers & channels. @@ -776,7 +739,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return sum_tensor -@loss_wrapper class Alignment(BaseLoss): """ Penalize the L2 distance between tensors in the batch to encourage visual @@ -830,7 +792,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return -sum_tensor -@loss_wrapper class Direction(BaseLoss): """ Visualize a general direction vector. @@ -873,7 +834,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return _dot_cossim(self.vec, activations, cossim_pow=self.cossim_pow) -@loss_wrapper class NeuronDirection(BaseLoss): """ Visualize a single (x, y) position for a direction vector. @@ -940,7 +900,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return _dot_cossim(self.vec, activations, cossim_pow=self.cossim_pow) -@loss_wrapper class AngledNeuronDirection(BaseLoss): """ Visualize a direction vector with an optional whitened activation vector to @@ -1039,7 +998,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return dot * torch.clamp(cossims, min=0.1) ** self.cossim_pow -@loss_wrapper class TensorDirection(BaseLoss): """ Visualize a tensor direction vector. @@ -1093,7 +1051,6 @@ def __call__(self, targets_to_values: ModuleOutputMapping) -> torch.Tensor: return _dot_cossim(self.vec, activations, cossim_pow=self.cossim_pow) -@loss_wrapper class ActivationWeights(BaseLoss): """ Apply weights to channels, neurons, or spots in the target. From 61a0be93d6279f3aa09e75e8e1a26c53eed9fbff Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 14:15:44 -0600 Subject: [PATCH 102/174] Fix lint errors --- captum/optim/_core/loss.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 90f7f36a1d..ee47ae0ea5 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -1,7 +1,6 @@ -import functools import operator from abc import ABC, abstractmethod, abstractproperty -from typing import Any, Callable, List, Optional, Tuple, Union +from typing import Callable, List, Optional, Tuple, Union import torch import torch.nn as nn @@ -1203,7 +1202,6 @@ def default_loss_summarize(loss_value: torch.Tensor) -> torch.Tensor: __all__ = [ "Loss", - "loss_wrapper", "BaseLoss", "CompositeLoss", "LayerActivation", From 31b5707b251c92552e6fb4cef9be2fc1526c37a9 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 14:16:54 -0600 Subject: [PATCH 103/174] Fix lint errors --- tests/optim/core/test_loss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/optim/core/test_loss.py b/tests/optim/core/test_loss.py index 92f1bd004e..4818554f2d 100644 --- a/tests/optim/core/test_loss.py +++ b/tests/optim/core/test_loss.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import operator import unittest -from typing import Any, List, Optional, Type, Union +from typing import Any, List, Type, Union import captum.optim._core.loss as opt_loss import torch @@ -175,7 +175,7 @@ def test_layer_activation_batch_index(self) -> None: def test_layer_activation_batch_index_negative(self) -> None: model = torch.nn.Identity() batch_index = -2 - loss = opt.loss.LayerActivation(model, batch_index=batch_index) + loss = opt_loss.LayerActivation(model, batch_index=batch_index) model_input = torch.arange(0, 5 * 3 * 5 * 5).view(5, 3, 5, 5).float() output = get_loss_value(model, loss, model_input) From 07c759363f03cf3ced3e41c702b97ab17d8efcb8 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 18:38:12 -0600 Subject: [PATCH 104/174] Fix Mypy type hints --- captum/optim/models/_common.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 2ba15cfa0d..30c574d60d 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -49,12 +49,12 @@ class RedirectedReLU(torch.autograd.Function): """ @staticmethod - def forward(self, input_tensor: torch.Tensor) -> torch.Tensor: + def forward(self, input_tensor: torch.Tensor) -> torch.Tensor: # type: ignore self.save_for_backward(input_tensor) return input_tensor.clamp(min=0) @staticmethod - def backward(self, grad_output: torch.Tensor) -> torch.Tensor: + def backward(self, grad_output: torch.Tensor) -> torch.Tensor: # type: ignore (input_tensor,) = self.saved_tensors relu_grad = grad_output.clone() relu_grad[input_tensor < 0] = 0 @@ -374,20 +374,25 @@ class MaxPool2dRelaxed(torch.nn.Module): def __init__( self, - kernel_size: Union[int, Tuple[int, ...]], - stride: Optional[Union[int, Tuple[int, ...]]] = None, - padding: Union[int, Tuple[int, ...]] = 0, + kernel_size: Union[int, Tuple[int, int]], + stride: Optional[Union[int, Tuple[int, int]]] = None, + padding: Union[int, Tuple[int, int]] = 0, ceil_mode: bool = False, ) -> None: """ Args: kernel_size (int or tuple of int): The size of the window to perform max - and average pooling with. + and average pooling with. Either a single int to use for both the + height & width or a tuple of 2 integers in format of: (height, width). stride (int or tuple of int, optional): The stride window size to use. + Either a single int to use for both the height & width or a tuple of 2 + integers in format of: (height, width). Default: ``None`` padding (int or tuple of int): The amount of zero padding to add to both - sides in the ``nn.MaxPool2d`` & ``nn.AvgPool2d`` modules. + sides in the ``nn.MaxPool2d`` & ``nn.AvgPool2d`` modules. Either a + single int to use for both the height & width or a tuple of 2 integers + in format of: (height, width). Default: ``0`` ceil_mode (bool, optional): Whether to use ceil or floor for creating the output shape. From 16dd3cf4871dcba8c3b92efa42366e80fd41cba2 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 18:39:24 -0600 Subject: [PATCH 105/174] Fix formatting --- captum/optim/models/_common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 30c574d60d..8a2c6f7391 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -384,15 +384,15 @@ def __init__( kernel_size (int or tuple of int): The size of the window to perform max and average pooling with. Either a single int to use for both the - height & width or a tuple of 2 integers in format of: (height, width). + height & width or a tuple of 2 integers in format of: (height, width). stride (int or tuple of int, optional): The stride window size to use. Either a single int to use for both the height & width or a tuple of 2 - integers in format of: (height, width). + integers in format of: (height, width). Default: ``None`` padding (int or tuple of int): The amount of zero padding to add to both sides in the ``nn.MaxPool2d`` & ``nn.AvgPool2d`` modules. Either a - single int to use for both the height & width or a tuple of 2 integers - in format of: (height, width). + single int to use for both the height & width or a tuple of 2 integers + in format of: (height, width). Default: ``0`` ceil_mode (bool, optional): Whether to use ceil or floor for creating the output shape. From f2f7ea553d63879e6c3336c96f7e7ef55fc344a1 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 27 Jul 2022 19:43:27 -0600 Subject: [PATCH 106/174] Fix typehint mistake --- captum/optim/_core/loss.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index ee47ae0ea5..6ec08391b1 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -806,7 +806,7 @@ def __init__( self, target: nn.Module, vec: torch.Tensor, - cossim_pow: Optional[float] = 0.0, + cossim_pow: float = 0.0, batch_index: Optional[int] = None, ) -> None: """ @@ -849,7 +849,7 @@ def __init__( x: Optional[int] = None, y: Optional[int] = None, channel_index: Optional[int] = None, - cossim_pow: Optional[float] = 0.0, + cossim_pow: float = 0.0, batch_index: Optional[int] = None, ) -> None: """ @@ -1009,7 +1009,7 @@ def __init__( self, target: nn.Module, vec: torch.Tensor, - cossim_pow: Optional[float] = 0.0, + cossim_pow: float = 0.0, batch_index: Optional[int] = None, ) -> None: """ From 91074968dbd7693af1758785385fb4c759df3fa5 Mon Sep 17 00:00:00 2001 From: Narine Kokhlikyan Date: Wed, 27 Jul 2022 18:53:42 -0700 Subject: [PATCH 107/174] Fix failing conda (#1000) Summary: Fix failing conda Pull Request resolved: https://github.com/pytorch/captum/pull/1000 Reviewed By: 99warriors Differential Revision: D38222553 Pulled By: NarineK fbshipit-source-id: 4e949041297d8caedd0658e6322a4054454ca788 --- scripts/install_via_conda.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/install_via_conda.sh b/scripts/install_via_conda.sh index aad12b91c1..a8e32b8d22 100755 --- a/scripts/install_via_conda.sh +++ b/scripts/install_via_conda.sh @@ -16,6 +16,7 @@ while getopts 'nf' flag; do # update conda # removing due to setuptools error during update #conda update -y -n base -c defaults conda +conda update --all --yes # required to use conda develop conda install -y conda-build From 7e2dbf9e23608fee29896357fb9095a70619a812 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 28 Jul 2022 12:49:01 -0600 Subject: [PATCH 108/174] callable -> Callable --- captum/optim/_core/optimization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py index 84b3b10a0f..6ce3fb3e13 100644 --- a/captum/optim/_core/optimization.py +++ b/captum/optim/_core/optimization.py @@ -61,7 +61,7 @@ def __init__( model (nn.Module, optional): The reference to PyTorch model instance. Set to ``None`` for no model instance. - loss_function (callable): The :mod:`Loss <.loss>` objective instance to + loss_function (Callable): The :mod:`Loss <.loss>` objective instance to minimize during optimization. input_param (InputParameterization, optional): A module that generates an input, consumed by the model. Example: An @@ -167,7 +167,7 @@ def optimize( optimizer (torch.optim.Optimizer, optional): A ``torch.optim.Optimizer`` instance to use for optimizing the input based on the loss function. Default: :class:`torch.optim.Adam` - loss_summarize_fn (callable, optional): The function to use for summarizing + loss_summarize_fn (Callable, optional): The function to use for summarizing tensor outputs from loss functions. Default: :func:`.default_loss_summarize` lr (float, optional): If no optimizer is given, then lr is used as the @@ -213,7 +213,7 @@ def n_steps(n: int, show_progress: bool = True) -> StopCriteria: Default: ``True`` Returns: - StopCriteria (callable): A stop criteria function. + StopCriteria (Callable): A stop criteria function. """ if show_progress: From ca84f7b6d8d2f1315c63d99f1a071df397ddcb80 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 29 Jul 2022 14:11:50 -0600 Subject: [PATCH 109/174] Docstring Improvements --- captum/optim/models/_common.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/captum/optim/models/_common.py b/captum/optim/models/_common.py index 8a2c6f7391..9fa9cda942 100644 --- a/captum/optim/models/_common.py +++ b/captum/optim/models/_common.py @@ -49,12 +49,12 @@ class RedirectedReLU(torch.autograd.Function): """ @staticmethod - def forward(self, input_tensor: torch.Tensor) -> torch.Tensor: # type: ignore + def forward(self, input_tensor: torch.Tensor) -> torch.Tensor: self.save_for_backward(input_tensor) return input_tensor.clamp(min=0) @staticmethod - def backward(self, grad_output: torch.Tensor) -> torch.Tensor: # type: ignore + def backward(self, grad_output: torch.Tensor) -> torch.Tensor: (input_tensor,) = self.saved_tensors relu_grad = grad_output.clone() relu_grad[input_tensor < 0] = 0 @@ -262,6 +262,13 @@ def collect_activations( """ Collect target activations for a model. + Example:: + + >>> model = opt.models.googlenet(pretrained=True) + >>> target = model.mixed4c # Target layer + >>> activ_dict = opt.models.collect_activations(model, target) + >>> activations = activ_dict[target] # Get activations from dict + Args: model (nn.Module): A PyTorch model instance. From 1a10252f7be31e02fe6be82065f12adde6cd8203 Mon Sep 17 00:00:00 2001 From: Fulton Wang Date: Sun, 31 Jul 2022 23:10:49 -0700 Subject: [PATCH 110/174] modify tracin self influence helpers (#994) Summary: Pull Request resolved: https://github.com/pytorch/captum/pull/994 change `TracInCP._self_influence_batch_tracincp` and `TracInCP._self_influence_batch_tracincp` `TracInCP._self_influence_batches_tracincp_fast` to be named `self_influence`, which is now public, and now accept a DataLoader yielding batches (as well as a single batch, as before). The modified helper function can be called by external functions to compute self influence. The helper itself is also changed to improve efficiency, by reducing the number of times checkpoints are loaded. The modified helper, despite being able to compute self influence scores for a dataloader yielding batches, still only loads each checkpoint once, per call. This is because the modified helper now has an outer iteration over checkpoints, and an inner iteration over batches (the order of iteration is reversed compared to before). This helper is called by `influence` when running it in self influence mode. The reason we cannot just increase the batch size to reduce the number of checkpoint loadings is that for large models (precisely those for which loading checkpoints is expensive), the model takes up too much memory, so that the batch size cannot be too large. Minor change: for `influence_src_dataset` argument of all `__init__`'s, add description of what assumptions we make of the batches yielded by the dataloader. Reviewed By: NarineK Differential Revision: D35603078 fbshipit-source-id: 92915477b98e06efebf84ed6e59bd1f02f25b894 --- captum/influence/_core/tracincp.py | 434 +++++++++++------- .../_core/tracincp_fast_rand_proj.py | 386 ++++++++++------ captum/influence/_utils/common.py | 12 + .../_core/test_tracin_self_influence.py | 70 ++- .../_core/test_tracin_show_progress.py | 217 +++++---- 5 files changed, 739 insertions(+), 380 deletions(-) diff --git a/captum/influence/_core/tracincp.py b/captum/influence/_core/tracincp.py index d5acc2dfef..78fa32738f 100644 --- a/captum/influence/_core/tracincp.py +++ b/captum/influence/_core/tracincp.py @@ -26,6 +26,7 @@ from captum._utils.progress import progress from captum.influence._core.influence import DataInfluence from captum.influence._utils.common import ( + _format_inputs_dataset, _get_k_most_influential_helper, _gradient_dot_product, _load_flexible_state_dict, @@ -95,7 +96,7 @@ class TracInCPBase(DataInfluence): def __init__( self, model: Module, - influence_src_dataset: Union[Dataset, DataLoader], + train_dataset: Union[Dataset, DataLoader], checkpoints: Union[str, List[str], Iterator], checkpoints_load_func: Callable = _load_flexible_state_dict, loss_fn: Optional[Union[Module, Callable]] = None, @@ -105,7 +106,7 @@ def __init__( Args: model (torch.nn.Module): An instance of pytorch model. This model should define all of its layers as attributes of the model. - influence_src_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): + train_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): In the `influence` method, we either compute the influence score of training examples on examples in a test batch, or self influence scores for those training examples, depending on which mode is used. @@ -120,9 +121,15 @@ def __init__( DataLoader used for processing should be as large as possible, but not too large, so that certain intermediate quantities created from a batch still fit in memory. Therefore, if - `influence_src_dataset` is a Dataset, `batch_size` should be large. - If `influence_src_dataset` was already a DataLoader to begin with, - it should have been constructed to have a large batch size. + `train_dataset` is a Dataset, `batch_size` should be large. + If `train_dataset` was already a DataLoader to begin with, + it should have been constructed to have a large batch size. It is + assumed that the Dataloader (regardless of whether it is created + from a Pytorch Dataset or not) yields tuples. For a `batch` that is + yielded, of length `L`, it is assumed that the forward function of + `model` accepts `L-1` arguments, and the last element of `batch` is + the label. In other words, `model(*batch[:-1])` gives the output of + `model`, and `batch[-1]` are the labels for the batch. checkpoints (str or List of str or Iterator): Either the directory of the path to store and retrieve model checkpoints, a list of filepaths with checkpoints from which to load, or an iterator which @@ -140,12 +147,12 @@ def __init__( loss_fn (Callable, optional): The loss function applied to model. Default: None batch_size (int or None, optional): Batch size of the DataLoader created to - iterate through `influence_src_dataset`, if it is a Dataset. + iterate through `train_dataset`, if it is a Dataset. `batch_size` should be chosen as large as possible so that certain intermediate quantities created from a batch still fit in memory. Specific implementations of `TracInCPBase` will detail the size of the intermediate quantities. `batch_size` must be an int if - `influence_src_dataset` is a Dataset. If `influence_src_dataset` + `train_dataset` is a Dataset. If `train_dataset` is a DataLoader, then `batch_size` is ignored as an argument. Default: 1 """ @@ -165,44 +172,80 @@ def __init__( self.loss_fn = loss_fn self.batch_size = batch_size - if not isinstance(influence_src_dataset, DataLoader): + if not isinstance(train_dataset, DataLoader): assert isinstance(batch_size, int), ( - "since the `influence_src_dataset` argument was a `Dataset`, " + "since the `train_dataset` argument was a `Dataset`, " "`batch_size` must be an int." ) - self.influence_src_dataloader = DataLoader( - influence_src_dataset, batch_size, shuffle=False - ) + self.train_dataloader = DataLoader(train_dataset, batch_size, shuffle=False) else: - self.influence_src_dataloader = influence_src_dataset + self.train_dataloader = train_dataset - self.influence_src_dataloader_len: Optional[int] = None + self.train_dataloader_len: Optional[int] = None try: # since we will calculate the number of batches in - # `self.influence_src_dataloader` whenever we use progress bar, calculate + # `self.train_dataloader` whenever we use progress bar, calculate # it once in initialization, for re-use. - self.influence_src_dataloader_len = len(self.influence_src_dataloader) - except AttributeError: - pass + self.train_dataloader_len = len(self.train_dataloader) + except TypeError: + warnings.warn( + "Unable to determine the number of batches in training dataset " + "`train_dataset`. Therefore, if showing the progress of computations, " + "only the number of batches processed can be displayed, and not the " + "percentage completion of the computation, nor any time estimates." + ) @abstractmethod - def _self_influence(self, show_progress: bool = False): + def self_influence( + self, + inputs_dataset: Union[Tuple[Any, ...], DataLoader], + show_progress: bool = False, + ) -> Tensor: """ - Returns: - self influence scores (tensor): 1D tensor containing self influence - scores for all examples in training dataset - `influence_src_dataset`. - show_progress (bool, optional): To compute the self influence scores for - all examples in training dataset `influence_src_dataset`, we - compute the self influence scores for each batch. If + Computes self influence scores for the examples in `inputs_dataset`, which is + either a single batch or a Pytorch `DataLoader` that yields batches. Therefore, + the computed self influence scores are *not* for the examples in training + dataset `train_dataset` (unlike when computing self influence scores using the + `influence` method). Note that if `inputs_dataset` is a single batch, this + will call `model` on that single batch, and if `inputs_dataset` yields + batches, this will call `model` on each batch that is yielded. Therefore, + please ensure that for both cases, the batch(es) that `model` is called + with are not too large, so that there will not be an out-of-memory error. + + Args: + batches (Tuple, or DataLoader): Either a single tuple of any, or a + `DataLoader`, where each batch yielded is a tuple of any. In + either case, the tuple represents a single batch, where the last + element is assumed to be the labels for the batch. That is, + `model(*batch[0:-1])` produces the output for `model`, + and `batch[-1]` are the labels, if any. This is the same + assumption made for each batch yielded by training dataset + `train_dataset`. Please see documentation for the + `train_dataset` argument to `TracInCP.__init__` for + more details on the assumed structure of a batch. + show_progress (bool, optional): Computation of self influence scores can + take a long time if `inputs_dataset` represents many examples. If `show_progress`is true, the progress of this computation will be - displayed. In particular, the number of batches for which self - influence scores have been computed will be displayed. It will - try to use tqdm if available for advanced features (e.g. time - estimation). Otherwise, it will fallback to a simple output of - progress. + displayed. In more detail, this computation will iterate over all + checkpoints (provided as the `checkpoints` initialization argument) + in an outer loop, and iterate over all batches that + `inputs_dataset` represents in an inner loop. Therefore, the + total number of (checkpoint, batch) combinations that need to be + iterated over is + (# of checkpoints x # of batches that `inputs_dataset` represents). + If `show_progress` is True, the total progress of both the outer + iteration over checkpoints and the inner iteration over batches is + displayed. It will try to use tqdm if available for advanced + features (e.g. time estimation). Otherwise, it will fallback to a + simple output of progress. Default: False + + Returns: + self_influence_scores (Tensor): This is a 1D tensor containing the self + influence scores of all examples in `inputs_dataset`, regardless of + whether it represents a single batch or a `DataLoader` that yields + batches. """ pass @@ -230,7 +273,7 @@ def _get_k_most_influential( Default: True show_progress (bool, optional): To compute the proponents (or opponents) for the batch of examples, we perform computation for each batch in - training dataset `influence_src_dataset`, If `show_progress`is + training dataset `train_dataset`, If `show_progress`is true, the progress of this computation will be displayed. In particular, the number of batches for which the computation has been performed will be displayed. It will try to use tqdm if @@ -244,13 +287,13 @@ def _get_k_most_influential( test example. Its dimension is `(inputs_batch_size, k)`, where `inputs_batch_size` is the number of examples in `inputs`. For example, if `proponents==True`, `indices[i][j]` is the index of the - example in training dataset `influence_src_dataset` with the + example in training dataset `train_dataset` with the k-th highest influence score for the j-th example in `inputs`. `indices` is a `torch.long` tensor so that it can directly be used to index other tensors. Each row of `influence_scores` contains the influence scores for a different test example, in sorted order. In particular, `influence_scores[i][j]` is the influence score of - example `indices[i][j]` in training dataset `influence_src_dataset` + example `indices[i][j]` in training dataset `train_dataset` on example `i` in the test batch represented by `inputs` and `targets`. """ @@ -267,7 +310,7 @@ def _influence( Args: inputs (Tuple of Any): A batch of examples. Does not represent labels, which are passed as `targets`. The assumption is that - `self.model(*inputs)` produces the predictions for the batch. + `model(*inputs)` produces the predictions for the batch. targets (tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. @@ -275,12 +318,12 @@ def _influence( Returns: influence_scores (tensor): Influence scores over the entire - training dataset `influence_src_dataset`. Dimensionality is + training dataset `train_dataset`. Dimensionality is (inputs_batch_size, src_dataset_size). For example: influence_scores[i][j] = the influence score for the j-th training example to the i-th input example. show_progress (bool, optional): To compute the influence of examples in - training dataset `influence_src_dataset`, we compute the influence + training dataset `train_dataset`, we compute the influence of each batch. If `show_progress`is true, the progress of this computation will be displayed. In particular, the number of batches for which influence has been computed will be displayed. It will @@ -307,17 +350,17 @@ def influence( # type: ignore[override] - self influence mode: This mode is used if `inputs` is None. This mode computes the self influence scores for every example in - the training dataset `influence_src_dataset`. + the training dataset `train_dataset`. - influence score mode: This mode is used if `inputs` is not None, and `k` is None. This mode computes the influence score of every example in - training dataset `influence_src_dataset` on every example in the test + training dataset `train_dataset` on every example in the test batch represented by `inputs` and `targets`. - k-most influential mode: This mode is used if `inputs` is not None, and `k` is not None, and an int. This mode computes the proponents or opponents of every example in the test batch represented by `inputs` and `targets`. In particular, for each test example in the test batch, this mode computes its proponents (resp. opponents), which are the - indices in the training dataset `influence_src_dataset` of the training + indices in the training dataset `train_dataset` of the training examples with the `k` highest (resp. lowest) influence scores on the test example. Proponents are computed if `proponents` is True. Otherwise, opponents are computed. For each test example, this method @@ -329,12 +372,12 @@ def influence( # type: ignore[override] will be run. Otherwise, `inputs` is the test batch that will be used when running in either influence score or k-most influential mode. If the argument `unpack_inputs` is False, the - assumption is that `self.model(inputs)` produces the predictions + assumption is that `model(inputs)` produces the predictions for a batch, and `inputs` can be of any type. Otherwise if the argument `unpack_inputs` is True, the assumption is that - `self.model(*inputs)` produces the predictions for a batch, and + `model(*inputs)` produces the predictions for a batch, and `inputs` will need to be a tuple. In other words, `inputs` will be - unpacked as an argument when passing to `self.model`. + unpacked as an argument when passing to `model`. Default: None targets (tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. @@ -354,7 +397,7 @@ def influence( # type: ignore[override] Default: True show_progress (bool, optional): For all modes, computation of results requires "training dataset computations": computations for each - batch in the training dataset `influence_src_dataset`, which may + batch in the training dataset `train_dataset`, which may take a long time. If `show_progress`is true, the progress of "training dataset computations" will be displayed. In particular, the number of batches for which computations have been performed @@ -368,29 +411,29 @@ def influence( # type: ignore[override] - self influence mode: if this mode is run (`inputs` is None), returns a 1D tensor of self influence scores over training dataset - `influence_src_dataset`. The length of this tensor is the number of - examples in `influence_src_dataset`, regardless of whether it is a + `train_dataset`. The length of this tensor is the number of + examples in `train_dataset`, regardless of whether it is a Dataset or DataLoader. - influence score mode: if this mode is run (`inputs is not None, `k` is None), returns a 2D tensor `influence_scores` of shape - `(input_size, influence_src_dataset_size)`, where `input_size` is + `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and - `influence_src_dataset_size` is the number of examples in - training dataset `influence_src_dataset`. In other words, + `train_dataset_size` is the number of examples in + training dataset `train_dataset`. In other words, `influence_scores[i][j]` is the influence score of the `j`-th - example in `influence_src_dataset` on the `i`-th example in the + example in `train_dataset` on the `i`-th example in the test batch. - k-most influential mode: if this mode is run (`inputs` is not None, `k` is an int), returns a namedtuple `(indices, influence_scores)`. `indices` is a 2D tensor of shape `(input_size, k)`, where `input_size` is the number of examples in the test batch. If computing proponents (resp. opponents), `indices[i][j]` is the - index in training dataset `influence_src_dataset` of the example + index in training dataset `train_dataset` of the example with the `j`-th highest (resp. lowest) influence score (out of the - examples in `influence_src_dataset`) on the `i`-th example in the + examples in `train_dataset`) on the `i`-th example in the test batch. `influence_scores` contains the corresponding influence scores. In particular, `influence_scores[i][j]` is the influence - score of example `indices[i][j]` in `influence_src_dataset` on + score of example `indices[i][j]` in `train_dataset` on example `i` in the test batch represented by `inputs` and `targets`. """ @@ -431,7 +474,9 @@ def _influence_route_to_helpers( _inputs = _format_inputs(inputs, unpack_inputs) if inputs is None: - return influence_instance._self_influence(show_progress) + return influence_instance.self_influence( + influence_instance.train_dataloader, show_progress + ) elif k is None: return influence_instance._influence(_inputs, targets, show_progress) else: @@ -444,7 +489,7 @@ class TracInCP(TracInCPBase): def __init__( self, model: Module, - influence_src_dataset: Union[Dataset, DataLoader], + train_dataset: Union[Dataset, DataLoader], checkpoints: Union[str, List[str], Iterator], checkpoints_load_func: Callable = _load_flexible_state_dict, layers: Optional[List[str]] = None, @@ -456,7 +501,7 @@ def __init__( Args: model (torch.nn.Module): An instance of pytorch model. This model should define all of its layers as attributes of the model. - influence_src_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): + train_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): In the `influence` method, we either compute the influence score of training examples on examples in a test batch, or self influence scores for those training examples, depending on which mode is used. @@ -471,9 +516,15 @@ def __init__( DataLoader used for processing should be as large as possible, but not too large, so that certain intermediate quantities created from a batch still fit in memory. Therefore, if - `influence_src_dataset` is a Dataset, `batch_size` should be large. - If `influence_src_dataset` was already a DataLoader to begin with, - it should have been constructed to have a large batch size. + `train_dataset` is a Dataset, `batch_size` should be large. + If `train_dataset` was already a DataLoader to begin with, + it should have been constructed to have a large batch size. It is + assumed that the Dataloader (regardless of whether it is created + from a Pytorch Dataset or not) yields tuples. For a `batch` that is + yielded, of length `L`, it is assumed that the forward function of + `model` accepts `L-1` arguments, and the last element of `batch` is + the label. In other words, `model(*batch[:-1])` gives the output of + `model`, and `batch[-1]` are the labels for the batch. checkpoints (str or List of str or Iterator): Either the directory of the path to store and retrieve model checkpoints, a list of filepaths with checkpoints from which to load, or an iterator which @@ -507,12 +558,12 @@ def __init__( to "mean", i.e. `loss_fn.reduction = "mean"`. Default: None batch_size (int or None, optional): Batch size of the DataLoader created to - iterate through `influence_src_dataset`, if it is a Dataset. + iterate through `train_dataset`, if it is a Dataset. `batch_size` should be chosen as large as possible so that certain intermediate quantities created from a batch still fit in memory. Specific implementations of `TracInCPBase` will detail the size of the intermediate quantities. `batch_size` must be an int if - `influence_src_dataset` is a Dataset. If `influence_src_dataset` + `train_dataset` is a Dataset. If `train_dataset` is a DataLoader, then `batch_size` is ignored as an argument. Default: 1 sample_wise_grads_per_batch (bool, optional): PyTorch's native gradient @@ -539,7 +590,7 @@ def __init__( TracInCPBase.__init__( self, model, - influence_src_dataset, + train_dataset, checkpoints, checkpoints_load_func, loss_fn, @@ -627,17 +678,17 @@ def influence( # type: ignore[override] - self influence mode: This mode is used if `inputs` is None. This mode computes the self influence scores for every example in - the training dataset `influence_src_dataset`. + the training dataset `train_dataset`. - influence score mode: This mode is used if `inputs` is not None, and `k` is None. This mode computes the influence score of every example in - training dataset `influence_src_dataset` on every example in the test + training dataset `train_dataset` on every example in the test batch represented by `inputs` and `targets`. - k-most influential mode: This mode is used if `inputs` is not None, and `k` is not None, and an int. This mode computes the proponents or opponents of every example in the test batch represented by `inputs` and `targets`. In particular, for each test example in the test batch, this mode computes its proponents (resp. opponents), which are the - indices in the training dataset `influence_src_dataset` of the training + indices in the training dataset `train_dataset` of the training examples with the `k` highest (resp. lowest) influence scores on the test example. Proponents are computed if `proponents` is True. Otherwise, opponents are computed. For each test example, this method @@ -649,12 +700,12 @@ def influence( # type: ignore[override] will be run. Otherwise, `inputs` is the test batch that will be used when running in either influence score or k-most influential mode. If the argument `unpack_inputs` is False, the - assumption is that `self.model(inputs)` produces the predictions + assumption is that `model(inputs)` produces the predictions for a batch, and `inputs` can be of any type. Otherwise if the argument `unpack_inputs` is True, the assumption is that - `self.model(*inputs)` produces the predictions for a batch, and + `model(*inputs)` produces the predictions for a batch, and `inputs` will need to be a tuple. In other words, `inputs` will be - unpacked as an argument when passing to `self.model`. + unpacked as an argument when passing to `model`. Default: None targets (tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. @@ -674,7 +725,7 @@ def influence( # type: ignore[override] Default: True show_progress (bool, optional): For all modes, computation of results requires "training dataset computations": computations for each - batch in the training dataset `influence_src_dataset`, which may + batch in the training dataset `train_dataset`, which may take a long time. If `show_progress`is true, the progress of "training dataset computations" will be displayed. In particular, the number of batches for which computations have been performed @@ -688,29 +739,29 @@ def influence( # type: ignore[override] - self influence mode: if this mode is run (`inputs` is None), returns a 1D tensor of self influence scores over training dataset - `influence_src_dataset`. The length of this tensor is the number of - examples in `influence_src_dataset`, regardless of whether it is a + `train_dataset`. The length of this tensor is the number of + examples in `train_dataset`, regardless of whether it is a Dataset or DataLoader. - influence score mode: if this mode is run (`inputs is not None, `k` is None), returns a 2D tensor `influence_scores` of shape - `(input_size, influence_src_dataset_size)`, where `input_size` is + `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and - `influence_src_dataset_size` is the number of examples in - training dataset `influence_src_dataset`. In other words, + `train_dataset_size` is the number of examples in + training dataset `train_dataset`. In other words, `influence_scores[i][j]` is the influence score of the `j`-th - example in `influence_src_dataset` on the `i`-th example in the + example in `train_dataset` on the `i`-th example in the test batch. - k-most influential mode: if this mode is run (`inputs` is not None, `k` is an int), returns a namedtuple `(indices, influence_scores)`. `indices` is a 2D tensor of shape `(input_size, k)`, where `input_size` is the number of examples in the test batch. If computing proponents (resp. opponents), `indices[i][j]` is the - index in training dataset `influence_src_dataset` of the example + index in training dataset `train_dataset` of the example with the `j`-th highest (resp. lowest) influence score (out of the - examples in `influence_src_dataset`) on the `i`-th example in the + examples in `train_dataset`) on the `i`-th example in the test batch. `influence_scores` contains the corresponding influence scores. In particular, `influence_scores[i][j]` is the influence - score of example `indices[i][j]` in `influence_src_dataset` on + score of example `indices[i][j]` in `train_dataset` on example `i` in the test batch represented by `inputs` and `targets`. """ @@ -769,7 +820,7 @@ def _influence( show_progress: bool = False, ) -> Tensor: r""" - Computes the influence of examples in training dataset `influence_src_dataset` + Computes the influence of examples in training dataset `train_dataset` on the examples in the test batch represented by `inputs` and `targets`. This implementation does not require knowing the number of training examples in advance. Instead, the number of training examples is inferred from the @@ -778,12 +829,12 @@ def _influence( Args: inputs (Tuple of Any): A test batch of examples. Does not represent labels, which are passed as `targets`. The assumption is that - `self.model(*inputs)` produces the predictions for the batch. + `model(*inputs)` produces the predictions for the batch. targets (tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. Default: None show_progress (bool, optional): To compute the influence of examples in - training dataset `influence_src_dataset`, we compute the influence + training dataset `train_dataset`, we compute the influence of each batch. If `show_progress`is true, the progress of this computation will be displayed. In particular, the number of batches for which influence has been computed will be displayed. It will @@ -794,29 +845,29 @@ def _influence( Returns: influence_scores (tensor): Influence scores from the TracInCP method. - Its shape is `(input_size, influence_src_dataset_size)`, where `input_size` + Its shape is `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and - `influence_src_dataset_size` is the number of examples in - training dataset `influence_src_dataset`. For example: + `train_dataset_size` is the number of examples in + training dataset `train_dataset`. For example: `influence_scores[i][j]` is the influence score for the j-th training example to the i-th input example. """ - influence_src_dataloader = self.influence_src_dataloader + train_dataloader = self.train_dataloader if show_progress: - influence_src_dataloader = progress( - influence_src_dataloader, + train_dataloader = progress( + train_dataloader, desc=( f"Using {self.get_name()} to compute " "influence for training batches" ), - total=self.influence_src_dataloader_len, + total=self.train_dataloader_len, ) return torch.cat( [ self._influence_batch_tracincp(inputs, targets, batch) - for batch in influence_src_dataloader + for batch in train_dataloader ], dim=1, ) @@ -844,7 +895,7 @@ def _get_k_most_influential( Default: True show_progress (bool, optional): To compute the proponents (or opponents) for the batch of examples, we perform computation for each batch in - training dataset `influence_src_dataset`, If `show_progress`is + training dataset `train_dataset`, If `show_progress`is true, the progress of this computation will be displayed. In particular, the number of batches for which the computation has been performed will be displayed. It will try to use tqdm if @@ -858,13 +909,13 @@ def _get_k_most_influential( test example. Its dimension is `(inputs_batch_size, k)`, where `inputs_batch_size` is the number of examples in `inputs`. For example, if `proponents==True`, `indices[i][j]` is the index of the - example in training dataset `influence_src_dataset` with the + example in training dataset `train_dataset` with the k-th highest influence score for the j-th example in `inputs`. `indices` is a `torch.long` tensor so that it can directly be used to index other tensors. Each row of `influence_scores` contains the influence scores for a different test example, in sorted order. In particular, `influence_scores[i][j]` is the influence score of - example `indices[i][j]` in training dataset `influence_src_dataset` + example `indices[i][j]` in training dataset `train_dataset` on example `i` in the test batch represented by `inputs` and `targets`. """ @@ -881,7 +932,7 @@ def _get_k_most_influential( ) return KMostInfluentialResults( *_get_k_most_influential_helper( - self.influence_src_dataloader, + self.train_dataloader, self._influence_batch_tracincp, inputs, targets, @@ -892,86 +943,159 @@ def _get_k_most_influential( ) ) - def _self_influence_batch_tracincp(self, batch: Tuple[Any, ...]): + def self_influence( + self, + inputs_dataset: Union[Tuple[Any, ...], DataLoader], + show_progress: bool = False, + ) -> Tensor: """ - Computes self influence scores for a single batch + Computes self influence scores for the examples in `inputs_dataset`, which is + either a single batch or a Pytorch `DataLoader` that yields batches. Therefore, + the computed self influence scores are *not* for the examples in training + dataset `train_dataset` (unlike when computing self influence scores using the + `influence` method). Note that if `inputs_dataset` is a single batch, this + will call `model` on that single batch, and if `inputs_dataset` yields + batches, this will call `model` on each batch that is yielded. Therefore, + please ensure that for both cases, the batch(es) that `model` is called + with are not too large, so that there will not be an out-of-memory error. + + Args: + batches (Tuple, or DataLoader): Either a single tuple of any, or a + `DataLoader`, where each batch yielded is a tuple of any. In + either case, the tuple represents a single batch, where the last + element is assumed to be the labels for the batch. That is, + `model(*batch[0:-1])` produces the output for `model`, + and `batch[-1]` are the labels, if any. This is the same + assumption made for each batch yielded by training dataset + `train_dataset`. Please see documentation for the + `train_dataset` argument to `TracInCP.__init__` for + more details on the assumed structure of a batch. + show_progress (bool, optional): Computation of self influence scores can + take a long time if `inputs_dataset` represents many examples. If + `show_progress`is true, the progress of this computation will be + displayed. In more detail, this computation will iterate over all + checkpoints (provided as the `checkpoints` initialization argument) + in an outer loop, and iterate over all batches that + `inputs_dataset` represents in an inner loop. Therefore, the + total number of (checkpoint, batch) combinations that need to be + iterated over is + (# of checkpoints x # of batches that `inputs_dataset` represents). + If `show_progress` is True, the total progress of both the outer + iteration over checkpoints and the inner iteration over batches is + displayed. It will try to use tqdm if available for advanced + features (e.g. time estimation). Otherwise, it will fallback to a + simple output of progress. + Default: False + + Returns: + self_influence_scores (Tensor): This is a 1D tensor containing the self + influence scores of all examples in `inputs_dataset`, regardless of + whether it represents a single batch or a `DataLoader` that yields + batches. """ + # If `inputs_dataset` is not a `DataLoader`, turn it into one. + inputs_dataset = _format_inputs_dataset(inputs_dataset) - def get_checkpoint_contribution(checkpoint): + # If `show_progress` is true, create an outer progress bar that keeps track of + # how many checkpoints have been processed + if show_progress: + checkpoints_progress = progress( + desc=( + f"Using {self.get_name()} to compute self " + "influence. Processing checkpoint" + ), + total=len(self.checkpoints), + ) + # Try to determine length of inner progress bar if possible, with a default + # of `None`. + inputs_dataset_len = None + try: + inputs_dataset_len = len(inputs_dataset) + except TypeError: + warnings.warn( + "Unable to determine the number of batches in `inputs_dataset`. " + "Therefore, if showing the progress of the computation of self " + "influence scores, only the number of batches processed can be " + "displayed, and not the percentage completion of the computation, " + "nor any time estimates." + ) + def get_checkpoint_contribution(checkpoint): + # This function returns a 1D tensor representing the contribution to the + # self influence score for the given checkpoint, for all batches in + # `inputs_dataset`. The length of the 1D tensor is the total number of + # examples in `inputs_dataset`. assert ( checkpoint is not None ), "None returned from `checkpoints`, cannot load." learning_rate = self.checkpoints_load_func(self.model, checkpoint) - layer_jacobians = self._basic_computation_tracincp(batch[0:-1], batch[-1]) + # This will store a list of the contribution of the self influence score + # from each batch. Each element is a 1D tensor of length batch_size - the + # batch size of each batch in `inputs_dataset` (they do not need to be all + # the same) + checkpoint_contribution = [] + + _inputs_dataset = inputs_dataset + # If `show_progress` is true, create an inner progress bar that keeps track + # of how many batches have been processed for the current checkpoint + if show_progress: + _inputs_dataset = progress( + inputs_dataset, + desc=( + f"Using {self.get_name()} to compute self " + "influence. Processing batch" + ), + total=inputs_dataset_len, + ) - # note that all variables in this function are for an entire batch. - # each `layer_jacobian` in `layer_jacobians` corresponds to a different - # layer. `layer_jacobian` is the jacobian w.r.t to a given layer's - # parameters. if the given layer's parameters are of shape *, then - # `layer_jacobian` is of shape (batch_size, *). for each layer, we need - # the squared jacobian for each example. so we square the jacobian and - # sum over all dimensions except the 0-th (the batch dimension). We then - # sum the contribution over all layers. - return ( - torch.sum( - torch.stack( - [ - torch.sum(layer_jacobian.flatten(start_dim=1) ** 2, dim=1) - for layer_jacobian in layer_jacobians - ], + for batch in _inputs_dataset: + + layer_jacobians = self._basic_computation_tracincp( + batch[0:-1], batch[-1] + ) + + # Note that all variables in this function are for an entire batch. + # Each `layer_jacobian` in `layer_jacobians` corresponds to a different + # layer. `layer_jacobian` is the jacobian w.r.t to a given layer's + # parameters. If the given layer's parameters are of shape *, then + # `layer_jacobian` is of shape (batch_size, *). For each layer, we need + # the squared jacobian for each example. So we square the jacobian and + # sum over all dimensions except the 0-th (the batch dimension). We then + # sum the contribution over all layers. + checkpoint_contribution.append( + torch.sum( + torch.stack( + [ + torch.sum( + layer_jacobian.flatten(start_dim=1) ** 2, dim=1 + ) + for layer_jacobian in layer_jacobians + ], + dim=0, + ), dim=0, - ), - dim=0, + ) + * learning_rate ) - * learning_rate - ) - batch_self_tracin_scores = get_checkpoint_contribution(self.checkpoints[0]) + # We concatenate the contributions from each batch into a single 1D tensor, + # which represents the contributions for all batches in `inputs_dataset` - for checkpoint in self.checkpoints[1:]: - batch_self_tracin_scores += get_checkpoint_contribution(checkpoint) + if show_progress: + checkpoints_progress.update() - return batch_self_tracin_scores + return torch.cat(checkpoint_contribution, dim=0) - def _self_influence(self, show_progress: bool = False): - """ - Returns: - self influence scores (tensor): 1D tensor containing self influence - scores for all examples in training dataset - `influence_src_dataset`. - show_progress (bool, optional): To compute the self influence scores for - all examples in training dataset `influence_src_dataset`, we - compute the self influence scores for each batch. If - `show_progress`is true, the progress of this computation will be - displayed. In particular, the number of batches for which self - influence scores have been computed will be displayed. It will - try to use tqdm if available for advanced features (e.g. time - estimation). Otherwise, it will fallback to a simple output of - progress. - Default: False - """ - influence_src_dataloader = self.influence_src_dataloader + batches_self_tracin_scores = get_checkpoint_contribution(self.checkpoints[0]) - if show_progress: - influence_src_dataloader = progress( - influence_src_dataloader, - desc=( - f"Using {self.get_name()} to compute self " - "influence for training batches" - ), - total=self.influence_src_dataloader_len, - ) + # The self influence score for all examples is the sum of contributions from + # each checkpoint + for checkpoint in self.checkpoints[1:]: + batches_self_tracin_scores += get_checkpoint_contribution(checkpoint) - return torch.cat( - [ - self._self_influence_batch_tracincp(batch) - for batch in influence_src_dataloader - ], - dim=0, - ) + return batches_self_tracin_scores def _basic_computation_tracincp( self, @@ -987,7 +1111,7 @@ def _basic_computation_tracincp( inputs (Tuple of Any): A batch of examples, which could be a training batch or test batch, depending which method is the caller. Does not represent labels, which are passed as `targets`. The assumption is - that `self.model(*inputs)` produces the predictions for the batch. + that `model(*inputs)` produces the predictions for the batch. targets (tensor or None): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. """ diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py index cfbf7b47d4..71fe3b45a0 100644 --- a/captum/influence/_core/tracincp_fast_rand_proj.py +++ b/captum/influence/_core/tracincp_fast_rand_proj.py @@ -13,6 +13,7 @@ ) from captum.influence._utils.common import ( _DatasetFromList, + _format_inputs_dataset, _get_k_most_influential_helper, _jacobian_loss_wrt_inputs, _load_flexible_state_dict, @@ -77,7 +78,7 @@ def __init__( self, model: Module, final_fc_layer: Union[Module, str], - influence_src_dataset: Union[Dataset, DataLoader], + train_dataset: Union[Dataset, DataLoader], checkpoints: Union[str, List[str], Iterator], checkpoints_load_func: Callable = _load_flexible_state_dict, loss_fn: Optional[Union[Module, Callable]] = None, @@ -93,7 +94,7 @@ def __init__( projection method. Can be either the layer module itself, or the fully qualified name of the layer if it is a defined attribute of the passed `model`. - influence_src_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): + train_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): In the `influence` method, we either compute the influence score of training examples on examples in a test batch, or self influence scores for those training examples, depending on which mode is used. @@ -108,9 +109,15 @@ def __init__( DataLoader used for processing should be as large as possible, but not too large, so that certain intermediate quantities created from a batch still fit in memory. Therefore, if - `influence_src_dataset` is a Dataset, `batch_size` should be large. - If `influence_src_dataset` was already a DataLoader to begin with, - it should have been constructed to have a large batch size. + `train_dataset` is a Dataset, `batch_size` should be large. + If `train_dataset` was already a DataLoader to begin with, + it should have been constructed to have a large batch size. It is + assumed that the Dataloader (regardless of whether it is created + from a Pytorch Dataset or not) yields tuples. For a `batch` that is + yielded, of length `L`, it is assumed that the forward function of + `model` accepts `L-1` arguments, and the last element of `batch` is + the label. In other words, `model(*batch[:-1])` gives the output of + `model`, and `batch[-1]` are the labels for the batch. checkpoints (str or List of str or Iterator): Either the directory of the path to store and retrieve model checkpoints, a list of filepaths with checkpoints from which to load, or an iterator which @@ -132,12 +139,12 @@ def __init__( to "mean", i.e. `loss_fn.reduction = "mean"`. Default: None batch_size (int or None, optional): Batch size of the DataLoader created to - iterate through `influence_src_dataset`, if it is a Dataset. + iterate through `train_dataset`, if it is a Dataset. `batch_size` should be chosen as large as possible so that certain intermediate quantities created from a batch still fit in memory. Specific implementations of `TracInCPBase` will detail the size of the intermediate quantities. `batch_size` must be an int if - `influence_src_dataset` is a Dataset. If `influence_src_dataset` + `train_dataset` is a Dataset. If `train_dataset` is a DataLoader, then `batch_size` is ignored as an argument. Default: 1 vectorize (bool, optional): Flag to use experimental vectorize functionality @@ -147,7 +154,7 @@ def __init__( TracInCPBase.__init__( self, model, - influence_src_dataset, + train_dataset, checkpoints, checkpoints_load_func, loss_fn, @@ -206,17 +213,17 @@ def influence( # type: ignore[override] - self influence mode: This mode is used if `inputs` is None. This mode computes the self influence scores for every example in - the training dataset `influence_src_dataset`. + the training dataset `train_dataset`. - influence score mode: This mode is used if `inputs` is not None, and `k` is None. This mode computes the influence score of every example in - training dataset `influence_src_dataset` on every example in the test + training dataset `train_dataset` on every example in the test batch represented by `inputs` and `targets`. - k-most influential mode: This mode is used if `inputs` is not None, and `k` is not None, and an int. This mode computes the proponents or opponents of every example in the test batch represented by `inputs` and `targets`. In particular, for each test example in the test batch, this mode computes its proponents (resp. opponents), which are the - indices in the training dataset `influence_src_dataset` of the training + indices in the training dataset `train_dataset` of the training examples with the `k` highest (resp. lowest) influence scores on the test example. Proponents are computed if `proponents` is True. Otherwise, opponents are computed. For each test example, this method @@ -228,12 +235,12 @@ def influence( # type: ignore[override] will be run. Otherwise, `inputs` is the test batch that will be used when running in either influence score or k-most influential mode. If the argument `unpack_inputs` is False, the - assumption is that `self.model(inputs)` produces the predictions + assumption is that `model(inputs)` produces the predictions for a batch, and `inputs` can be of any type. Otherwise if the argument `unpack_inputs` is True, the assumption is that - `self.model(*inputs)` produces the predictions for a batch, and + `model(*inputs)` produces the predictions for a batch, and `inputs` will need to be a tuple. In other words, `inputs` will be - unpacked as an argument when passing to `self.model`. + unpacked as an argument when passing to `model`. Default: None targets (tensor, optional): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so @@ -254,7 +261,7 @@ def influence( # type: ignore[override] Default: True show_progress (bool, optional): For all modes, computation of results requires "training dataset computations": computations for each - batch in the training dataset `influence_src_dataset`, which may + batch in the training dataset `train_dataset`, which may take a long time. If `show_progress`is true, the progress of "training dataset computations" will be displayed. In particular, the number of batches for which computations have been performed @@ -268,29 +275,29 @@ def influence( # type: ignore[override] - self influence mode: if this mode is run (`inputs` is None), returns a 1D tensor of self influence scores over training dataset - `influence_src_dataset`. The length of this tensor is the number of - examples in `influence_src_dataset`, regardless of whether it is a + `train_dataset`. The length of this tensor is the number of + examples in `train_dataset`, regardless of whether it is a Dataset or DataLoader. - influence score mode: if this mode is run (`inputs is not None, `k` is None), returns a 2D tensor `influence_scores` of shape - `(input_size, influence_src_dataset_size)`, where `input_size` is + `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and - `influence_src_dataset_size` is the number of examples in - training dataset `influence_src_dataset`. In other words, + `train_dataset_size` is the number of examples in + training dataset `train_dataset`. In other words, `influence_scores[i][j]` is the influence score of the `j`-th - example in `influence_src_dataset` on the `i`-th example in the + example in `train_dataset` on the `i`-th example in the test batch. - k-most influential mode: if this mode is run (`inputs` is not None, `k` is an int), returns a namedtuple `(indices, influence_scores)`. `indices` is a 2D tensor of shape `(input_size, k)`, where `input_size` is the number of examples in the test batch. If computing proponents (resp. opponents), `indices[i][j]` is the - index in training dataset `influence_src_dataset` of the example + index in training dataset `train_dataset` of the example with the `j`-th highest (resp. lowest) influence score (out of the - examples in `influence_src_dataset`) on the `i`-th example in the + examples in `train_dataset`) on the `i`-th example in the test batch. `influence_scores` contains the corresponding influence scores. In particular, `influence_scores[i][j]` is the influence - score of example `indices[i][j]` in `influence_src_dataset` on + score of example `indices[i][j]` in `train_dataset` on example `i` in the test batch represented by `inputs` and `targets`. """ @@ -351,7 +358,7 @@ def _influence( # type: ignore[override] show_progress: bool = False, ) -> Tensor: r""" - Computes the influence of examples in training dataset `influence_src_dataset` + Computes the influence of examples in training dataset `train_dataset` on the examples in the test batch represented by `inputs` and `targets`. This implementation does not require knowing the number of training examples in advance. Instead, the number of training examples is inferred from the @@ -360,12 +367,12 @@ def _influence( # type: ignore[override] Args: inputs (Tuple of Any): A batch of examples. Does not represent labels, which are passed as `targets`. The assumption is that - `self.model(*inputs)` produces the predictions for the batch. + `model(*inputs)` produces the predictions for the batch. targets (tensor): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so labels are required. show_progress (bool, optional): To compute the influence of examples in - training dataset `influence_src_dataset`, we compute the influence + training dataset `train_dataset`, we compute the influence of each batch. If `show_progress`is true, the progress of this computation will be displayed. In particular, the number of batches for which influence has been computed will be displayed. It will @@ -376,31 +383,31 @@ def _influence( # type: ignore[override] Returns: influence_scores (tensor): Influence scores from the TracInCPFast method. - Its shape is `(input_size, influence_src_dataset_size)`, where `input_size` + Its shape is `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and - `influence_src_dataset_size` is the number of examples in - training dataset `influence_src_dataset`. For example: + `train_dataset_size` is the number of examples in + training dataset `train_dataset`. For example: `influence_scores[i][j]` is the influence score for the j-th training example to the i-th input example. """ assert targets is not None - influence_src_dataloader = self.influence_src_dataloader + train_dataloader = self.train_dataloader if show_progress: - influence_src_dataloader = progress( - influence_src_dataloader, + train_dataloader = progress( + train_dataloader, desc=( f"Using {self.get_name()} to compute " "influence for training batches" ), - total=self.influence_src_dataloader_len, + total=self.train_dataloader_len, ) return torch.cat( [ self._influence_batch_tracincp_fast(inputs, targets, batch) - for batch in influence_src_dataloader + for batch in train_dataloader ], dim=1, ) @@ -428,7 +435,7 @@ def _get_k_most_influential( # type: ignore[override] Default: True show_progress (bool, optional): To compute the proponents (or opponents) for the batch of examples, we perform computation for each batch in - training dataset `influence_src_dataset`, If `show_progress`is + training dataset `train_dataset`, If `show_progress`is true, the progress of this computation will be displayed. In particular, the number of batches for which the computation has been performed will be displayed. It will try to use tqdm if @@ -442,13 +449,13 @@ def _get_k_most_influential( # type: ignore[override] test example. Its dimension is `(inputs_batch_size, k)`, where `inputs_batch_size` is the number of examples in `inputs`. For example, if `proponents==True`, `indices[i][j]` is the index of the - example in training dataset `influence_src_dataset` with the + example in training dataset `train_dataset` with the k-th highest influence score for the j-th example in `inputs`. `indices` is a `torch.long` tensor so that it can directly be used to index other tensors. Each row of `influence_scores` contains the influence scores for a different test example, in sorted order. In particular, `influence_scores[i][j]` is the influence score of - example `indices[i][j]` in training dataset `influence_src_dataset` + example `indices[i][j]` in training dataset `train_dataset` on example `i` in the test batch represented by `inputs` and `targets`. """ @@ -465,7 +472,7 @@ def _get_k_most_influential( # type: ignore[override] ) return KMostInfluentialResults( *_get_k_most_influential_helper( - self.influence_src_dataloader, + self.train_dataloader, self._influence_batch_tracincp_fast, inputs, targets, @@ -476,72 +483,141 @@ def _get_k_most_influential( # type: ignore[override] ) ) - def _self_influence_batch_tracincp_fast(self, batch: Tuple[Any, ...]): + def self_influence( + self, + inputs_dataset: Union[Tuple[Any, ...], DataLoader], + show_progress: bool = False, + ) -> Tensor: """ - Computes self influence scores for a single batch + Computes self influence scores for the examples in `inputs_dataset`, which is + either a single batch or a Pytorch `DataLoader` that yields batches. Therefore, + the computed self influence scores are *not* for the examples in training + dataset `train_dataset` (unlike when computing self influence scores using the + `influence` method). Note that if `inputs_dataset` is a single batch, this + will call `model` on that single batch, and if `inputs_dataset` yields + batches, this will call `model` on each batch that is yielded. Therefore, + please ensure that for both cases, the batch(es) that `model` is called + with are not too large, so that there will not be an out-of-memory error. + + Args: + batches (Tuple, or DataLoader): Either a single tuple of any, or a + `DataLoader`, where each batch yielded is a tuple of any. In + either case, the tuple represents a single batch, where the last + element is assumed to be the labels for the batch. That is, + `model(*batch[0:-1])` produces the output for `model`, + and `batch[-1]` are the labels, if any. This is the same + assumption made for each batch yielded by training dataset + `train_dataset`. Please see documentation for the + `train_dataset` argument to `TracInCP.__init__` for + more details on the assumed structure of a batch. + show_progress (bool, optional): Computation of self influence scores can + take a long time if `inputs_dataset` represents many examples. If + `show_progress`is true, the progress of this computation will be + displayed. In more detail, this computation will iterate over all + checkpoints (provided as the `checkpoints` initialization argument) + in an outer loop, and iterate over all batches that + `inputs_dataset` represents in an inner loop. Therefore, the + total number of (checkpoint, batch) combinations that need to be + iterated over is + (# of checkpoints x # of batches that `inputs_dataset` represents). + If `show_progress` is True, the total progress of both the outer + iteration over checkpoints and the inner iteration over batches is + displayed. It will try to use tqdm if available for advanced + features (e.g. time estimation). Otherwise, it will fallback to a + simple output of progress. + Default: False + + Returns: + self_influence_scores (Tensor): This is a 1D tensor containing the self + influence scores of all examples in `inputs_dataset`, regardless of + whether it represents a single batch or a `DataLoader` that yields + batches. """ + # If `inputs_dataset` is not a `DataLoader`, turn it into one. + inputs_dataset = _format_inputs_dataset(inputs_dataset) - def get_checkpoint_contribution(checkpoint): + # If `show_progress` is true, create an outer progress bar that keeps track of + # how many checkpoints have been processed + if show_progress: + checkpoints_progress = progress( + desc=( + f"Using {self.get_name()} to compute self " + "influence. Processing checkpoint" + ), + total=len(self.checkpoints), + ) + # Try to determine length of inner progress bar if possible, with a default + # of `None`. + inputs_dataset_len = None + try: + inputs_dataset_len = len(inputs_dataset) + except TypeError: + warnings.warn( + "Unable to determine the number of batches in `inputs_dataset`. " + "Therefore, if showing the progress of the computation of self " + "influence scores, only the number of batches processed can be " + "displayed, and not the percentage completion of the computation, " + "nor any time estimates." + ) + def get_checkpoint_contribution(checkpoint): + # This function returns a 1D tensor representing the contribution to the + # self influence score for the given checkpoint, for all batches in + # `inputs_dataset`. The length of the 1D tensor is the total number of + # examples in `inputs_dataset`. assert ( checkpoint is not None ), "None returned from `checkpoints`, cannot load." learning_rate = self.checkpoints_load_func(self.model, checkpoint) - batch_jacobian, batch_layer_input = _basic_computation_tracincp_fast( - self, batch[0:-1], batch[-1] - ) + # This will store a list of the contribution of the self influence score + # from each batch. Each element is a 1D tensor of length batch_size - the + # batch size of each batch in `inputs_dataset` (they do not need to be all + # the same) + checkpoint_contribution = [] + + _inputs_dataset = inputs_dataset + # If `show_progress` is true, create an inner progress bar that keeps track + # of how many batches have been processed for the current checkpoint + if show_progress: + _inputs_dataset = progress( + inputs_dataset, + desc=( + f"Using {self.get_name()} to compute self " + "influence. Processing batch" + ), + total=inputs_dataset_len, + ) - return ( - torch.sum(batch_jacobian**2, dim=1) - * torch.sum(batch_layer_input**2, dim=1) - * learning_rate - ) + for batch in _inputs_dataset: - batch_self_tracin_scores = get_checkpoint_contribution(self.checkpoints[0]) + batch_jacobian, batch_layer_input = _basic_computation_tracincp_fast( + self, batch[0:-1], batch[-1] + ) - for checkpoint in self.checkpoints[1:]: - batch_self_tracin_scores += get_checkpoint_contribution(checkpoint) + checkpoint_contribution.append( + torch.sum(batch_jacobian**2, dim=1) + * torch.sum(batch_layer_input**2, dim=1) + * learning_rate + ) - return batch_self_tracin_scores + # We concatenate the contributions from each batch into a single 1D tensor, + # which represents the contributions for all batches in `inputs_dataset` - def _self_influence(self, show_progress: bool = False): - """ - Returns: - self influence scores (tensor): 1D tensor containing self influence - scores for all examples in training dataset - `influence_src_dataset`. - show_progress (bool, optional): To compute the self influence scores for - all examples in training dataset `influence_src_dataset`, we - compute the self influence scores for each batch. If - `show_progress`is true, the progress of this computation will be - displayed. In particular, the number of batches for which self - influence scores have been computed will be displayed. It will - try to use tqdm if available for advanced features (e.g. time - estimation). Otherwise, it will fallback to a simple output of - progress. - Default: False - """ - influence_src_dataloader = self.influence_src_dataloader + if show_progress: + checkpoints_progress.update() - if show_progress: - influence_src_dataloader = progress( - influence_src_dataloader, - desc=( - f"Using {self.get_name()} to compute self " - "influence for training batches" - ), - total=self.influence_src_dataloader_len, - ) + return torch.cat(checkpoint_contribution, dim=0) - return torch.cat( - [ - self._self_influence_batch_tracincp_fast(batch) - for batch in influence_src_dataloader - ], - dim=0, - ) + batches_self_tracin_scores = get_checkpoint_contribution(self.checkpoints[0]) + + # The self influence score for all examples is the sum of contributions from + # each checkpoint + for checkpoint in self.checkpoints[1:]: + batches_self_tracin_scores += get_checkpoint_contribution(checkpoint) + + return batches_self_tracin_scores def _basic_computation_tracincp_fast( @@ -564,7 +640,7 @@ def _basic_computation_tracincp_fast( inputs (Tuple of Any): A batch of examples, which could be a training batch or test batch, depending which method is the caller. Does not represent labels, which are passed as `targets`. The assumption is - that `self.model(*inputs)` produces the predictions for the batch. + that `model(*inputs)` produces the predictions for the batch. targets (tensor): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. """ @@ -599,7 +675,7 @@ def __init__( self, model: Module, final_fc_layer: Union[Module, str], - influence_src_dataset: Union[Dataset, DataLoader], + train_dataset: Union[Dataset, DataLoader], checkpoints: Union[str, List[str], Iterator], checkpoints_load_func: Callable = _load_flexible_state_dict, loss_fn: Optional[Union[Module, Callable]] = None, @@ -620,10 +696,10 @@ def __init__( interactive use cases. It should not be used if `influence` will only be called once, because to enable fast calls to `influence`, time and memory intensive preprocessing is required in `__init__`. Furthermore, it should not - be used to calculate self influencs scores - `TracInCPFast` should be used + be used to calculate self influence scores - `TracInCPFast` should be used instead for that purpose. To enable interactive analysis, this implementation - saves pre-computed vectors for all training examples in - `influence_src_dataset`. Crucially, the influence score of a training + computes and saves "embedding" vectors for all training examples in + `train_dataset`. Crucially, the influence score of a training example on a test example is simply the dot-product of their corresponding vectors, and proponents / opponents can be found by first storing vectors for training examples in a nearest-neighbor data structure, and then finding the @@ -631,7 +707,7 @@ def __init__( of the TracIn paper). This class should only be used if calls to `influence` to obtain proponents / opponents or influence scores will be made in an "interactive" manner, and there is sufficient memory to store vectors for the - entire `influence_src_dataset`. This is because in order to enable interactive + entire `train_dataset`. This is because in order to enable interactive analysis, this implementation incures overhead in ``__init__` to setup the nearest-neighbors data structure, which is both time and memory intensive, as vectors corresponding to all training examples needed to be stored. To reduce @@ -647,7 +723,7 @@ def __init__( projection method. Can be either the layer module itself, or the fully qualified name of the layer if it is a defined attribute of the passed `model`. - influence_src_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): + train_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): In the `influence` method, we either compute the influence score of training examples on examples in a test batch, or self influence scores for those training examples, depending on which mode is used. @@ -662,9 +738,15 @@ def __init__( DataLoader used for processing should be as large as possible, but not too large, so that certain intermediate quantities created from a batch still fit in memory. Therefore, if - `influence_src_dataset` is a Dataset, `batch_size` should be large. - If `influence_src_dataset` was already a DataLoader to begin with, - it should have been constructed to have a large batch size. + `train_dataset` is a Dataset, `batch_size` should be large. + If `train_dataset` was already a DataLoader to begin with, + it should have been constructed to have a large batch size. It is + assumed that the Dataloader (regardless of whether it is created + from a Pytorch Dataset or not) yields tuples. For a `batch` that is + yielded, of length `L`, it is assumed that the forward function of + `model` accepts `L-1` arguments, and the last element of `batch` is + the label. In other words, `model(*batch[:-1])` gives the output of + `model`, and `batch[-1]` are the labels for the batch. checkpoints (str or List of str or Iterator): Either the directory of the path to store and retrieve model checkpoints, a list of filepaths with checkpoints from which to load, or an iterator which @@ -682,12 +764,12 @@ def __init__( `nn.BCELoss(reduction="mean")` is *not* acceptable. Default: None batch_size (int or None, optional): Batch size of the DataLoader created to - iterate through `influence_src_dataset`, if it is a Dataset. + iterate through `train_dataset`, if it is a Dataset. `batch_size` should be chosen as large as possible so that certain intermediate quantities created from a batch still fit in memory. Specific implementations of `TracInCPBase` will detail the size of the intermediate quantities. `batch_size` must be an int if - `influence_src_dataset` is a Dataset. If `influence_src_dataset` + `train_dataset` is a Dataset. If `train_dataset` is a DataLoader, then `batch_size` is ignored as an argument. Default: 1 vectorize (bool): Flag to use experimental vectorize functionality @@ -728,7 +810,7 @@ def __init__( self, model, final_fc_layer, - influence_src_dataset, + train_dataset, checkpoints, checkpoints_load_func, loss_fn, @@ -739,7 +821,7 @@ def __init__( warnings.warn( ( "WARNING: Using this implementation stores quantities related to the " - "entire `influence_src_dataset` in memory, and may results in running " + "entire `train_dataset` in memory, and may results in running " "out of memory. If this happens, consider using %s instead, for which " "each call to `influence` to compute influence scores or proponents " "will be slower, but may avoid running out of memory." @@ -755,12 +837,12 @@ def __init__( torch.manual_seed(seed) # for reproducibility self.projection_quantities = self._set_projections_tracincp_fast_rand_proj( - self.influence_src_dataloader, + self.train_dataloader, ) self.src_intermediate_quantities = ( self._get_intermediate_quantities_tracincp_fast_rand_proj( - self.influence_src_dataloader, + self.train_dataloader, self.projection_quantities, ) ) @@ -778,7 +860,7 @@ def _influence( # type: ignore[override] Args: inputs (tuple of Any): A batch of examples. Does not represent labels, which are passed as `targets`. The assumption is that - `self.model(*inputs)` produces the predictions for the batch. + `model(*inputs)` produces the predictions for the batch. targets (tensor): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so labels are required. @@ -786,9 +868,9 @@ def _influence( # type: ignore[override] Returns: influence_scores (tensor): Influence scores from the TracInCPFastRandProj method. Its shape is - `(input_size, influence_src_dataset_size)`, where `input_size` is the - number of examples in the test batch, and `influence_src_dataset_size` is - the number of examples in training dataset `influence_src_dataset`. For + `(input_size, train_dataset_size)`, where `input_size` is the + number of examples in the test batch, and `train_dataset_size` is + the number of examples in training dataset `train_dataset`. For example, `influence_scores[i][j]` is the influence score for the j-th training example to the i-th input example. """ @@ -831,13 +913,13 @@ def _get_k_most_influential( # type: ignore[override] test example. Its dimension is `(inputs_batch_size, k)`, where `inputs_batch_size` is the number of examples in `inputs`. For example, if `proponents==True`, `indices[i][j]` is the index of the - example in training dataset `influence_src_dataset` with the + example in training dataset `train_dataset` with the k-th highest influence score for the j-th example in `inputs`. `indices` is a `torch.long` tensor so that it can directly be used to index other tensors. Each row of `influence_scores` contains the influence scores for a different test example, in sorted order. In particular, `influence_scores[i][j]` is the influence score of - example `indices[i][j]` in training dataset `influence_src_dataset` + example `indices[i][j]` in training dataset `train_dataset` on example `i` in the test batch represented by `inputs` and `targets`. """ @@ -860,17 +942,55 @@ def _get_k_most_influential( # type: ignore[override] return KMostInfluentialResults(indices, distances) - def _self_influence(self): + def self_influence( + self, + inputs_dataset: Union[Tuple[Any, ...], DataLoader], + show_progress: bool = False, + ) -> Tensor: """ - NOT IMPLEMENTED - no need to implement `TracInCPFastRandProj._self_influence`, - as `TracInCPFast._self_influence` is sufficient - the latter does not benefit + NOT IMPLEMENTED - no need to implement `TracInCPFastRandProj.self_influence`, + as `TracInCPFast.self_influence` is sufficient - the latter does not benefit from random projections, since no quantities associated with a training example are stored (other than its self influence score) + Computes self influence scores for a single batch or a Pytorch `DataLoader` + that yields batches. Note that if `inputs_dataset` is a single batch, this + will call `model` on that single batch, and if `inputs_dataset` yields + batches, this will call `model` on each batch that is yielded. Therefore, + please ensure that for both cases, the batch(es) that `model` is called + with are not too large, so that there will not be an out-of-memory error. + + Args: + batches (Tuple, or DataLoader): Either a single tuple of any, or a + `DataLoader`, where each batch yielded is a tuple of any. In + either case, the tuple represents a single batch, where the last + element is assumed to be the labels for the batch. That is, + `model(*batch[0:-1])` produces the output for `model`, + and `batch[-1]` are the labels, if any. This is the same + assumption made for each batch yielded by training dataset + `train_dataset`. Please see documentation for the + `train_dataset` argument to `TracInCP.__init__` for + more details on the assumed structure of a batch. + show_progress (bool, optional): Computation of self influence scores can + take a long time if `inputs_dataset` represents many examples. If + `show_progress`is true, the progress of this computation will be + displayed. In more detail, this computation will iterate over all + checkpoints (provided as the `checkpoints` initialization argument) + and all batches that `inputs_dataset` represents. Therefore, the + total number of (checkpoint, batch) combinations that need to be + iterated over is + (# of checkpoints x # of batches that `inputs_dataset` represents). + If `show_progress` is True, the total number of such combinations + that have been iterated over is displayed. It will try to use tqdm + if available for advanced features (e.g. time estimation). + Otherwise, it will fallback to a simple output of progress. + Default: False + Returns: - self influence scores (Tensor): 1-d Tensor containing self influence - scores for all examples in training dataset - `influence_src_dataset`. + self_influence_scores (Tensor): This is a 1D tensor containing the self + influence scores of all examples in `inputs_dataset`, regardless of + whether it represents a single batch or a `DataLoader` that yields + batches. """ warnings.warn( ( @@ -883,7 +1003,7 @@ def _self_influence(self): "`TracInCPFastRandProj`needed. Further considering the fact that " "random projections results only in approximate self influence " "scores, there is no reason to use `TracInCPFastRandProj` when " - "calculating self-influence scores." + "calculating self influence scores." ) ) raise NotImplementedError @@ -903,7 +1023,7 @@ def influence( # type: ignore[override] - influence score mode: This mode is used if `inputs` is not None, and `k` is None. This mode computes the influence score of every example in - training dataset `influence_src_dataset` on every example in the test + training dataset `train_dataset` on every example in the test batch represented by `inputs` and `targets`. - k-most influential mode: This mode is used if `inputs` is not None, and @@ -911,7 +1031,7 @@ def influence( # type: ignore[override] opponents of every example in the test batch represented by `inputs` and `targets`. In particular, for each test example in the test batch, this mode computes its proponents (resp. opponents), which are the - indices in the training dataset `influence_src_dataset` of the training + indices in the training dataset `train_dataset` of the training examples with the `k` highest (resp. lowest) influence scores on the test example. Proponents are computed if `proponents` is True. Otherwise, opponents are computed. For each test example, this method @@ -927,12 +1047,12 @@ def influence( # type: ignore[override] will be run. Otherwise, `inputs` is the test batch that will be used when running in either influence score or k-most influential mode. If the argument `unpack_inputs` is False, the - assumption is that `self.model(inputs)` produces the predictions + assumption is that `model(inputs)` produces the predictions for a batch, and `inputs` can be of any type. Otherwise if the argument `unpack_inputs` is True, the assumption is that - `self.model(*inputs)` produces the predictions for a batch, and + `model(*inputs)` produces the predictions for a batch, and `inputs` will need to be a tuple. In other words, `inputs` will be - unpacked as an argument when passing to `self.model`. + unpacked as an argument when passing to `model`. Default: None targets (tensor): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so `targets` @@ -957,24 +1077,24 @@ def influence( # type: ignore[override] - influence score mode: if this mode is run (`inputs is not None, `k` is None), returns a 2D tensor `influence_scores` of shape - `(input_size, influence_src_dataset_size)`, where `input_size` is + `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and - `influence_src_dataset_size` is the number of examples in - training dataset `influence_src_dataset`. In other words, + `train_dataset_size` is the number of examples in + training dataset `train_dataset`. In other words, `influence_scores[i][j]` is the influence score of the `j`-th - example in `influence_src_dataset` on the `i`-th example in the + example in `train_dataset` on the `i`-th example in the test batch. - k-most influential mode: if this mode is run (`inputs` is not None, `k` is an int), returns a namedtuple `(indices, influence_scores)`. `indices` is a 2D tensor of shape `(input_size, k)`, where `input_size` is the number of examples in the test batch. If computing proponents (resp. opponents), `indices[i][j]` is the - index in training dataset `influence_src_dataset` of the example + index in training dataset `train_dataset` of the example with the `j`-th highest (resp. lowest) influence score (out of the - examples in `influence_src_dataset`) on the `i`-th example in the + examples in `train_dataset`) on the `i`-th example in the test batch. `influence_scores` contains the corresponding influence scores. In particular, `influence_scores[i][j]` is the influence - score of example `indices[i][j]` in `influence_src_dataset` on + score of example `indices[i][j]` in `train_dataset` on example `i` in the test batch represented by `inputs` and `targets`. """ @@ -990,7 +1110,7 @@ def influence( # type: ignore[override] _inputs = _format_inputs(inputs, unpack_inputs) if inputs is None: - return self._self_influence() + return self.self_influence(self.train_dataloader) elif k is None: return self._influence(_inputs, targets) else: @@ -1014,7 +1134,7 @@ def _set_projections_tracincp_fast_rand_proj( dataloader (DataLoader): determining the projection requires knowing the dimensionality of the last layer's parameters (`jacobian_dim` below) and its input (`layer_input_dim` below). These are - determined by passing a batch to `self.model`. `dataloader` + determined by passing a batch to `model`. `dataloader` provides that batch. Returns: @@ -1096,7 +1216,7 @@ def _process_src_intermediate_quantities_tracincp_fast_rand_proj( Args: src_intermediate_quantities (tensor): the output of the `_get_intermediate_quantities_tracin_fast_rand_proj` function when - applied to training dataset `influence_src_dataset`. This + applied to training dataset `train_dataset`. This output is the vector representation of all training examples. The dot product between the representation of a training example and the representation of a test example gives the influence score @@ -1143,6 +1263,8 @@ def _get_intermediate_quantities_tracincp_fast_rand_proj( the variable d in the top of page 15 of the TracIn paper: https://arxiv.org/pdf/2002.08484.pdf. """ + # for each checkpoint, this stores a list of projections for a batch + # each element in this list will be of shape (batch_size, projection_dim) checkpoint_projections: List[Any] = [[] for _ in self.checkpoints] if projection_quantities is None: diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index b86ddf9f93..d6f1c99f20 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -313,3 +313,15 @@ def __getitem__(self, i: int) -> Any: def __len__(self) -> int: return len(self._l) + + +def _format_inputs_dataset(inputs_dataset: Union[Tuple[Any, ...], DataLoader]): + # if `inputs_dataset` is not a `DataLoader`, turn it into one. + # `_DatasetFromList` turns a list into a `Dataset` where `__getitem__` + # returns an element in the list, and using it to construct a `DataLoader` + # with `batch_size=None` gives a `DataLoader` that yields a single batch. + if not isinstance(inputs_dataset, DataLoader): + inputs_dataset = DataLoader( + _DatasetFromList([inputs_dataset]), shuffle=False, batch_size=None + ) + return inputs_dataset diff --git a/tests/influence/_core/test_tracin_self_influence.py b/tests/influence/_core/test_tracin_self_influence.py index 60f0be2678..9448982a58 100644 --- a/tests/influence/_core/test_tracin_self_influence.py +++ b/tests/influence/_core/test_tracin_self_influence.py @@ -12,6 +12,7 @@ DataInfluenceConstructor, get_random_model_and_data, ) +from torch.utils.data import DataLoader class TestTracInSelfInfluence(BaseTest): @@ -33,7 +34,7 @@ class TestTracInSelfInfluence(BaseTest): ("mean", DataInfluenceConstructor(TracInCPFast)), ] ], - name_func=build_test_name_func(args_to_skip=["reduction"]), + name_func=build_test_name_func(), ) def test_tracin_self_influence( self, reduction: str, tracin_constructor: Callable, unpack_inputs: bool @@ -73,3 +74,70 @@ def test_tracin_self_influence( delta=0.01, mode="max", ) + + @parameterized.expand( + [ + (reduction, constructor, unpack_inputs) + for unpack_inputs in [True, False] + for (reduction, constructor) in [ + ("none", DataInfluenceConstructor(TracInCP)), + ( + "sum", + DataInfluenceConstructor( + TracInCP, + sample_wise_grads_per_batch=True, + ), + ), + ("sum", DataInfluenceConstructor(TracInCPFast)), + ("mean", DataInfluenceConstructor(TracInCPFast)), + ] + ], + name_func=build_test_name_func(), + ) + def test_tracin_self_influence_dataloader_vs_single_batch( + self, reduction: str, tracin_constructor: Callable, unpack_inputs: bool + ) -> None: + # tests that the result of calling the public method `self_influence` for a + # DataLoader of batches is the same as when the batches are collated into a + # single batch + with tempfile.TemporaryDirectory() as tmpdir: + ( + net, + train_dataset, + ) = get_random_model_and_data(tmpdir, unpack_inputs, return_test_data=False) + + # create a single batch representing the entire dataset + single_batch = next( + iter(DataLoader(train_dataset, batch_size=len(train_dataset))) + ) + + # create a dataloader that yields batches from the dataset + dataloader = DataLoader(train_dataset, batch_size=5) + + # create tracin instance + criterion = nn.MSELoss(reduction=reduction) + batch_size = 5 + tracin = tracin_constructor( + net, + train_dataset, + tmpdir, + batch_size, + criterion, + ) + + # compute self influence using `self_influence` when passing in a single + # batch + single_batch_self_influence = tracin.self_influence(single_batch) + + # compute self influence using `self_influence` when passing in a + # dataloader with the same examples + dataloader_self_influence = tracin.self_influence(dataloader) + + # the two self influences should be equal + assertTensorAlmostEqual( + self, + single_batch_self_influence, + dataloader_self_influence, + delta=0.01, # due to numerical issues, we can't set this to 0.0 + mode="max", + ) diff --git a/tests/influence/_core/test_tracin_show_progress.py b/tests/influence/_core/test_tracin_show_progress.py index 5b35352880..17b9065458 100644 --- a/tests/influence/_core/test_tracin_show_progress.py +++ b/tests/influence/_core/test_tracin_show_progress.py @@ -49,115 +49,148 @@ class TestTracInShowProgress(BaseTest): ], name_func=build_test_name_func(args_to_skip=["reduction"]), ) - @unittest.mock.patch("sys.stderr", new_callable=io.StringIO) def test_tracin_show_progress( self, reduction: str, tracin_constructor: Callable, mode: str, - mock_stderr, ) -> None: - with tempfile.TemporaryDirectory() as tmpdir: + with unittest.mock.patch("sys.stderr", new_callable=io.StringIO) as mock_stderr: - batch_size = 5 + with tempfile.TemporaryDirectory() as tmpdir: - ( - net, - train_dataset, - test_samples, - test_labels, - ) = get_random_model_and_data( - tmpdir, unpack_inputs=False, return_test_data=True - ) + batch_size = 5 - self.assertTrue(isinstance(reduction, str)) - criterion = nn.MSELoss(reduction=reduction) + ( + net, + train_dataset, + test_samples, + test_labels, + ) = get_random_model_and_data( + tmpdir, unpack_inputs=False, return_test_data=True + ) - self.assertTrue(callable(tracin_constructor)) - tracin = tracin_constructor( - net, - train_dataset, - tmpdir, - batch_size, - criterion, - ) + self.assertTrue(isinstance(reduction, str)) + criterion = nn.MSELoss(reduction=reduction) - if mode == "self influence": - tracin.influence(show_progress=True) - output = mock_stderr.getvalue() - self.assertTrue( - ( - ( - f"Using {tracin.get_name()} to compute self influence " - "for training batches: 100%" - ) - in output - ), - f"Error progress output: {repr(output)}", + self.assertTrue(callable(tracin_constructor)) + tracin = tracin_constructor( + net, + train_dataset, + tmpdir, + batch_size, + criterion, ) - elif mode == "influence": - tracin.influence( - test_samples, - test_labels, - k=None, - show_progress=True, - ) - output = mock_stderr.getvalue() - self.assertTrue( - ( - ( - f"Using {tracin.get_name()} to compute influence " - "for training batches: 100%" + if mode == "self influence": + + # For self influence, displaying progress involves nested progress + # bars, which are not currently supported by the backup + # `SimpleProgress` that is used if `tqdm` is not installed. + # Therefore, we skip the test in this case. + # TODO: support nested progress bars for `SimpleProgress` + try: + import tqdm # noqa + except ModuleNotFoundError: + raise unittest.SkipTest( + ( + "Skipping self influence progress bar tests for " + f"{tracin.get_name()}, because proper displaying " + "requires the tqdm module, which is not installed." + ) ) - in output - ), - f"Error progress output: {repr(output)}", - ) - elif mode == "k-most": - tracin.influence( - test_samples, - test_labels, - k=2, - proponents=True, - show_progress=True, - ) - output = mock_stderr.getvalue() - self.assertTrue( - ( + tracin.influence(show_progress=True) + output = mock_stderr.getvalue() + # We are showing nested progress bars for the `self_influence` + # method, with the outer progress bar over checkpoints, and + # the inner progress bar over batches. First, we check that + # the outer progress bar reaches 100% once + self.assertEqual( + output.count( + ( + f"Using {tracin.get_name()} to compute self influence. " + "Processing checkpoint: 100%" + ) + ), + 1, + f"Error in progress of batches with output: {repr(output)}", + ) + # Second, we check that the inner progress bar reaches 100% + # once for each checkpoint in `tracin.checkpoints` + self.assertEqual( + output.count( + ( + f"Using {tracin.get_name()} to compute self influence. " + "Processing batch: 100%" + ) + ), + len(tracin.checkpoints), + f"Error in progress of checkpoints with output: {repr(output)}", + ) + elif mode == "influence": + + tracin.influence( + test_samples, + test_labels, + k=None, + show_progress=True, + ) + output = mock_stderr.getvalue() + self.assertTrue( ( - f"Using {tracin.get_name()} to perform computation for " - "getting proponents. Processing training batches: 100%" - ) - in output - ), - f"Error progress output: {repr(output)}", - ) - mock_stderr.seek(0) - mock_stderr.truncate(0) + ( + f"Using {tracin.get_name()} to compute influence " + "for training batches: 100%" + ) + in output + ), + f"Error progress output: {repr(output)}", + ) + elif mode == "k-most": - tracin.influence( - test_samples, - test_labels, - k=2, - proponents=False, - show_progress=True, - ) - output = mock_stderr.getvalue() - self.assertTrue( - ( + tracin.influence( + test_samples, + test_labels, + k=2, + proponents=True, + show_progress=True, + ) + output = mock_stderr.getvalue() + self.assertTrue( ( - f"Using {tracin.get_name()} to perform computation for " - "getting opponents. Processing training batches: 100%" - ) - in output - ), - f"Error progress output: {repr(output)}", - ) - else: - raise Exception("unknown test mode") + ( + f"Using {tracin.get_name()} to perform computation for " + "getting proponents. Processing training batches: 100%" + ) + in output + ), + f"Error progress output: {repr(output)}", + ) + mock_stderr.seek(0) + mock_stderr.truncate(0) - mock_stderr.seek(0) - mock_stderr.truncate(0) + tracin.influence( + test_samples, + test_labels, + k=2, + proponents=False, + show_progress=True, + ) + output = mock_stderr.getvalue() + self.assertTrue( + ( + ( + f"Using {tracin.get_name()} to perform computation for " + "getting opponents. Processing training batches: 100%" + ) + in output + ), + f"Error progress output: {repr(output)}", + ) + else: + raise Exception("unknown test mode") + + mock_stderr.seek(0) + mock_stderr.truncate(0) From a08883f1ba3abc96ace06b11883893419b187d09 Mon Sep 17 00:00:00 2001 From: Fulton Wang Date: Mon, 1 Aug 2022 09:30:43 -0700 Subject: [PATCH 111/174] allow self influence iteration options (#1002) Summary: Pull Request resolved: https://github.com/pytorch/captum/pull/1002 - For self influence computation, there needs to be an iteration over both checkpoints as well as batches. This diff adds a `by_checkpoints` option. If true, the outer iteration is over checkpoints. If false, the outer iteration is over checkpoints. Because self influence computation can be called through the `influence` and `self_influence` methods, this option is added to both methods. Because only `TracInCP` and `TracInCPFast` should be used for self influence computation, only those classes are changed. - The implement this option, the old `self_influence` method, which had the outer iteration over checkpoints, is renamed to be a private `_self_influence_by_checkpoints` method. A new `_self_influence_by_batches` method is added, which has an outer iteration over batches, and re-uses the `_self_influence_by_checkpoints` method to compute self influence scores for a single batch (this method can accept both a single batch, as well as a dataloader yielding batches). Because the logic of this method is the same for all classes, a helper method, `_self_influence_by_batches_helper`, is added to `captum.influence._utils.common`. Finally, the new `self_influence` method simply chooses whether to call `_self_influence_by_checkpoints` or `_self_influence_by_batches`. - Documentation describing the two options for `by_checkpoints` is added to the `self_influence` and `influence` methods. - `test_tracin_show_progress` now differentiates between 2 modes: "self influence by checkpoints" (the original test for progress bar when calculating self influence scores, which checks whether the outer progress bar over checkpoints and inner progress bars over batches both reach 100%), and the newly added mode "self influence by batches", which checks whether the progress bar over batches reaches 100%. - `test_tracin_self_influence` now also checks whether computing self influence scores gives the same result regardless of whether `by_checkpoints` is True or False Reviewed By: NarineK Differential Revision: D37743920 fbshipit-source-id: ead1bbc86e8eac477768113b9939556d9b1c0de1 --- captum/influence/_core/tracincp.py | 102 +++++++++++--- .../_core/tracincp_fast_rand_proj.py | 104 ++++++++++++--- captum/influence/_utils/common.py | 94 +++++++++++++ .../_core/test_tracin_self_influence.py | 21 ++- .../_core/test_tracin_show_progress.py | 126 ++++++++++++------ 5 files changed, 372 insertions(+), 75 deletions(-) diff --git a/captum/influence/_core/tracincp.py b/captum/influence/_core/tracincp.py index 78fa32738f..15811e684b 100644 --- a/captum/influence/_core/tracincp.py +++ b/captum/influence/_core/tracincp.py @@ -30,6 +30,7 @@ _get_k_most_influential_helper, _gradient_dot_product, _load_flexible_state_dict, + _self_influence_by_batches_helper, ) from captum.log import log_usage from torch import Tensor @@ -475,7 +476,8 @@ def _influence_route_to_helpers( if inputs is None: return influence_instance.self_influence( - influence_instance.train_dataloader, show_progress + influence_instance.train_dataloader, + show_progress, ) elif k is None: return influence_instance._influence(_inputs, targets, show_progress) @@ -727,11 +729,9 @@ def influence( # type: ignore[override] requires "training dataset computations": computations for each batch in the training dataset `train_dataset`, which may take a long time. If `show_progress`is true, the progress of - "training dataset computations" will be displayed. In particular, - the number of batches for which computations have been performed - will be displayed. It will try to use tqdm if available for - advanced features (e.g. time estimation). Otherwise, it will - fallback to a simple output of progress. + "training dataset computations" will be displayed. It will try to + use tqdm if available for advanced features (e.g. time estimation). + Otherwise, it will fallback to a simple output of progress. Default: False Returns: @@ -926,7 +926,7 @@ def _get_k_most_influential( ( f"Using {self.get_name()} to perform computation for " f'getting {"proponents" if proponents else "opponents"}. ' - "Processing training batches: 100%" + "Processing training batches" ) ) ) @@ -943,7 +943,7 @@ def _get_k_most_influential( ) ) - def self_influence( + def _self_influence_by_checkpoints( self, inputs_dataset: Union[Tuple[Any, ...], DataLoader], show_progress: bool = False, @@ -957,7 +957,11 @@ def self_influence( will call `model` on that single batch, and if `inputs_dataset` yields batches, this will call `model` on each batch that is yielded. Therefore, please ensure that for both cases, the batch(es) that `model` is called - with are not too large, so that there will not be an out-of-memory error. + with are not too large, so that there will not be an out-of-memory error. This + implementation performs an outer iteration over checkpoints, and an inner + iteration over all batches that `inputs_dataset` represents. The pros of this + implementation are that the checkpoints do not need to be loaded too many + times. Args: batches (Tuple, or DataLoader): Either a single tuple of any, or a @@ -976,13 +980,10 @@ def self_influence( displayed. In more detail, this computation will iterate over all checkpoints (provided as the `checkpoints` initialization argument) in an outer loop, and iterate over all batches that - `inputs_dataset` represents in an inner loop. Therefore, the - total number of (checkpoint, batch) combinations that need to be - iterated over is - (# of checkpoints x # of batches that `inputs_dataset` represents). - If `show_progress` is True, the total progress of both the outer - iteration over checkpoints and the inner iteration over batches is - displayed. It will try to use tqdm if available for advanced + `inputs_dataset` represents in an inner loop. Thus if + `show_progress` is True, the progress of both the outer + iteration and the inner iterations will be displayed. To show + progress, it will try to use tqdm if available for advanced features (e.g. time estimation). Otherwise, it will fallback to a simple output of progress. Default: False @@ -1097,6 +1098,75 @@ def get_checkpoint_contribution(checkpoint): return batches_self_tracin_scores + def self_influence( + self, + inputs_dataset: Union[Tuple[Any, ...], DataLoader], + show_progress: bool = False, + outer_loop_by_checkpoints: bool = False, + ) -> Tensor: + """ + Computes self influence scores for the examples in `inputs_dataset`, which is + either a single batch or a Pytorch `DataLoader` that yields batches. Therefore, + the computed self influence scores are *not* for the examples in training + dataset `train_dataset` (unlike when computing self influence scores using the + `influence` method). Note that if `inputs_dataset` is a single batch, this + will call `model` on that single batch, and if `inputs_dataset` yields + batches, this will call `model` on each batch that is yielded. Therefore, + please ensure that for both cases, the batch(es) that `model` is called + with are not too large, so that there will not be an out-of-memory error. + Internally, this computation requires iterating both over the batches in + `inputs_dataset`, as well as different model checkpoints. There are two ways + this iteration can be done. If `outer_loop_by_checkpoints` is False, the outer + iteration will be over batches, and the inner iteration will be over + checkpoints. This has the pro that displaying the progress of the computation + is more intuitive, involving displaying the number of batches for which self + influence scores have been computed. If `outer_loop_by_checkpoints` is True, + the outer iteration will be over checkpoints, and the inner iteration will be + over batches. This has the pro that the checkpoints do not need to be loaded + for each batch. For large models, loading checkpoints can be time-intensive. + + Args: + batches (Tuple, or DataLoader): Either a single tuple of any, or a + `DataLoader`, where each batch yielded is a tuple of any. In + either case, the tuple represents a single batch, where the last + element is assumed to be the labels for the batch. That is, + `model(*batch[0:-1])` produces the output for `model`, + and `batch[-1]` are the labels, if any. This is the same + assumption made for each batch yielded by training dataset + `train_dataset`. Please see documentation for the + `train_dataset` argument to `TracInCP.__init__` for + more details on the assumed structure of a batch. + show_progress (bool, optional): Computation of self influence scores can + take a long time if `inputs_dataset` represents many examples. If + `show_progress`is true, the progress of this computation will be + displayed. In more detail, if `outer_loop_by_checkpoints` is False, + this computation will iterate over all batches in an outer loop. + Thus if `show_progress` is True, the number of batches for which + self influence scores have been computed will be displayed. If + `outer_loop_by_checkpoints` is True, this computation will iterate + over all checkpoints (provided as the `checkpoints` initialization + argument) in an outer loop, and iterate over all batches that + `inputs_dataset` represents in an inner loop. Thus if + `show_progress` is True, the progress of both the outer + iteration and the inner iterations will be displayed. To show + progress, it will try to use tqdm if available for advanced + features (e.g. time estimation). Otherwise, it will fallback to a + simple output of progress. + Default: False + outer_loop_by_checkpoints (bool, optional): If performing an outer + iteration over checkpoints; see method description for more + details. + Default: False + """ + if outer_loop_by_checkpoints: + return self._self_influence_by_checkpoints(inputs_dataset, show_progress) + return _self_influence_by_batches_helper( + self._self_influence_by_checkpoints, + self.get_name(), + inputs_dataset, + show_progress, + ) + def _basic_computation_tracincp( self, inputs: Tuple[Any, ...], diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py index 71fe3b45a0..f42dbd1527 100644 --- a/captum/influence/_core/tracincp_fast_rand_proj.py +++ b/captum/influence/_core/tracincp_fast_rand_proj.py @@ -17,6 +17,7 @@ _get_k_most_influential_helper, _jacobian_loss_wrt_inputs, _load_flexible_state_dict, + _self_influence_by_batches_helper, _tensor_batch_dot, ) from captum.influence._utils.nearest_neighbors import ( @@ -263,11 +264,9 @@ def influence( # type: ignore[override] requires "training dataset computations": computations for each batch in the training dataset `train_dataset`, which may take a long time. If `show_progress`is true, the progress of - "training dataset computations" will be displayed. In particular, - the number of batches for which computations have been performed - will be displayed. It will try to use tqdm if available for - advanced features (e.g. time estimation). Otherwise, it will - fallback to a simple output of progress. + "training dataset computations" will be displayed. It will try to + use tqdm if available for advanced features (e.g. time estimation). + Otherwise, it will fallback to a simple output of progress. Default: False Returns: @@ -466,7 +465,7 @@ def _get_k_most_influential( # type: ignore[override] ( f"Using {self.get_name()} to perform computation for " f'getting {"proponents" if proponents else "opponents"}. ' - "Processing training batches: 100%" + "Processing training batches" ) ) ) @@ -483,7 +482,7 @@ def _get_k_most_influential( # type: ignore[override] ) ) - def self_influence( + def _self_influence_by_checkpoints( self, inputs_dataset: Union[Tuple[Any, ...], DataLoader], show_progress: bool = False, @@ -497,7 +496,11 @@ def self_influence( will call `model` on that single batch, and if `inputs_dataset` yields batches, this will call `model` on each batch that is yielded. Therefore, please ensure that for both cases, the batch(es) that `model` is called - with are not too large, so that there will not be an out-of-memory error. + with are not too large, so that there will not be an out-of-memory error. This + implementation performs an outer iteration over checkpoints, and an inner + iteration over all batches that `inputs_dataset` represents. The pros of this + implementation are that the checkpoints do not need to be loaded too many + times. Args: batches (Tuple, or DataLoader): Either a single tuple of any, or a @@ -516,13 +519,10 @@ def self_influence( displayed. In more detail, this computation will iterate over all checkpoints (provided as the `checkpoints` initialization argument) in an outer loop, and iterate over all batches that - `inputs_dataset` represents in an inner loop. Therefore, the - total number of (checkpoint, batch) combinations that need to be - iterated over is - (# of checkpoints x # of batches that `inputs_dataset` represents). - If `show_progress` is True, the total progress of both the outer - iteration over checkpoints and the inner iteration over batches is - displayed. It will try to use tqdm if available for advanced + `inputs_dataset` represents in an inner loop. Thus if + `show_progress` is True, the progress of both the outer + iteration and the inner iterations will be displayed. To show + progress, it will try to use tqdm if available for advanced features (e.g. time estimation). Otherwise, it will fallback to a simple output of progress. Default: False @@ -619,6 +619,75 @@ def get_checkpoint_contribution(checkpoint): return batches_self_tracin_scores + def self_influence( + self, + inputs_dataset: Union[Tuple[Any, ...], DataLoader], + show_progress: bool = False, + outer_loop_by_checkpoints: bool = False, + ) -> Tensor: + """ + Computes self influence scores for the examples in `inputs_dataset`, which is + either a single batch or a Pytorch `DataLoader` that yields batches. Therefore, + the computed self influence scores are *not* for the examples in training + dataset `train_dataset` (unlike when computing self influence scores using the + `influence` method). Note that if `inputs_dataset` is a single batch, this + will call `model` on that single batch, and if `inputs_dataset` yields + batches, this will call `model` on each batch that is yielded. Therefore, + please ensure that for both cases, the batch(es) that `model` is called + with are not too large, so that there will not be an out-of-memory error. + Internally, this computation requires iterating both over the batches in + `inputs_dataset`, as well as different model checkpoints. There are two ways + this iteration can be done. If `outer_loop_by_checkpoints` is False, the outer + iteration will be over batches, and the inner iteration will be over + checkpoints. This has the pro that displaying the progress of the computation + is more intuitive, involving displaying the number of batches for which self + influence scores have been computed. If `outer_loop_by_checkpoints` is True, + the outer iteration will be over checkpoints, and the inner iteration will be + over batches. This has the pro that the checkpoints do not need to be loaded + for each batch. For large models, loading checkpoints can be time-intensive. + + Args: + batches (Tuple, or DataLoader): Either a single tuple of any, or a + `DataLoader`, where each batch yielded is a tuple of any. In + either case, the tuple represents a single batch, where the last + element is assumed to be the labels for the batch. That is, + `model(*batch[0:-1])` produces the output for `model`, + and `batch[-1]` are the labels, if any. This is the same + assumption made for each batch yielded by training dataset + `train_dataset`. Please see documentation for the + `train_dataset` argument to `TracInCP.__init__` for + more details on the assumed structure of a batch. + show_progress (bool, optional): Computation of self influence scores can + take a long time if `inputs_dataset` represents many examples. If + `show_progress`is true, the progress of this computation will be + displayed. In more detail, if `outer_loop_by_checkpoints` is False, + this computation will iterate over all batches in an outer loop. + Thus if `show_progress` is True, the number of batches for which + self influence scores have been computed will be displayed. If + `outer_loop_by_checkpoints` is True, this computation will iterate + over all checkpoints (provided as the `checkpoints` initialization + argument) in an outer loop, and iterate over all batches that + `inputs_dataset` represents in an inner loop. Thus if + `show_progress` is True, the progress of both the outer + iteration and the inner iterations will be displayed. To show + progress, it will try to use tqdm if available for advanced + features (e.g. time estimation). Otherwise, it will fallback to a + simple output of progress. + Default: False + outer_loop_by_checkpoints (bool, optional): If performing an outer + iteration over checkpoints; see method description for more + details. + Default: False + """ + if outer_loop_by_checkpoints: + return self._self_influence_by_checkpoints(inputs_dataset, show_progress) + return _self_influence_by_batches_helper( + self._self_influence_by_checkpoints, + self.get_name(), + inputs_dataset, + show_progress, + ) + def _basic_computation_tracincp_fast( influence_instance: TracInCPFast, @@ -946,6 +1015,7 @@ def self_influence( self, inputs_dataset: Union[Tuple[Any, ...], DataLoader], show_progress: bool = False, + outer_loop_by_checkpoints: bool = False, ) -> Tensor: """ NOT IMPLEMENTED - no need to implement `TracInCPFastRandProj.self_influence`, @@ -985,6 +1055,10 @@ def self_influence( if available for advanced features (e.g. time estimation). Otherwise, it will fallback to a simple output of progress. Default: False + outer_loop_by_checkpoints (bool, optional): If performing an outer + iteration over checkpoints; see method description for more + details. + Default: False Returns: self_influence_scores (Tensor): This is a 1D tensor containing the self diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index d6f1c99f20..131f8964b8 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import warnings from typing import Any, Callable, List, Optional, Tuple, Union import torch @@ -325,3 +326,96 @@ def _format_inputs_dataset(inputs_dataset: Union[Tuple[Any, ...], DataLoader]): _DatasetFromList([inputs_dataset]), shuffle=False, batch_size=None ) return inputs_dataset + + +def _self_influence_by_batches_helper( + self_influence_batch_fn: Callable, + instance_name: str, + inputs_dataset: Union[Tuple[Any, ...], DataLoader], + show_progress: bool = False, +) -> Tensor: + """ + Computes self influence scores for the examples in `inputs_dataset`, which is + either a single batch or a Pytorch `DataLoader` that yields batches. The self + influence scores for a single batch are computed using the + `self_influence_batch_fn` input. Note that if `inputs_dataset` is a single batch, + this will call `model` on that single batch, where `model` is the model used to + compute self influence scores by `self_influence_batch_fn`, and if `inputs_dataset` + yields batches, this will call `model` on each batch that is yielded. Therefore, + please ensure that for both cases, the batch(es) that `model` is called + with are not too large, so that there will not be an out-of-memory error. This + implementation performs an outer iteration over all batches that + `inputs_dataset` represents, and an inner iteration over checkpoints. The pros + of this implementation are that showing the progress of the computation is + straightforward. + + Args: + self_influence_batch_fn (Callable): This is the function that computes self + influence scores for a single batch. + instance_name (str): This is the name of the implementation class that + `self_influence_batch_fn` is a method of. This is used for displaying + warning messages. + batches (Tuple, or DataLoader): Either a single tuple of any, or a + `DataLoader`, where each batch yielded is a tuple of any. In + either case, the tuple represents a single batch, where the last + element is assumed to be the labels for the batch. That is, + `model(*batch[0:-1])` produces the output for `model`, + and `batch[-1]` are the labels, if any. This is the same + assumption made for each batch yielded by training dataset + `train_dataset`. Please see documentation for the + `train_dataset` argument to `TracInCP.__init__` for + more details on the assumed structure of a batch. + show_progress (bool, optional): Computation of self influence scores can + take a long time if `inputs_dataset` represents many examples. If + `show_progress`is true, the progress of this computation will be + displayed. In particular, the number of batches for which self + influence scores have been computed will be displayed. It will try + to use tqdm if available for advanced features (e.g. time + estimation). Otherwise, it will fallback to a simple output of + progress. + Default: False + + Returns: + self_influence_scores (Tensor): This is a 1D tensor containing the self + influence scores of all examples in `inputs_dataset`, regardless of + whether it represents a single batch or a `DataLoader` that yields + batches. + """ + # If `inputs_dataset` is not a `DataLoader`, turn it into one. + inputs_dataset = _format_inputs_dataset(inputs_dataset) + + # If `show_progress` is true, create a progress bar that keeps track of how + # many batches have been processed + if show_progress: + # First, try to determine length of progress bar if possible, with a + # default of `None` + inputs_dataset_len = None + try: + inputs_dataset_len = len(inputs_dataset) + except TypeError: + warnings.warn( + "Unable to determine the number of batches in `inputs_dataset`. " + "Therefore, if showing the progress of the computation of self " + "influence scores, only the number of batches processed can be " + "displayed, and not the percentage completion of the computation, " + "nor any time estimates." + ) + # then create the progress bar + inputs_dataset = progress( + inputs_dataset, + desc=f"Using {instance_name} to compute self influence. Processing batch", + total=inputs_dataset_len, + ) + + # To compute self influence scores for each batch, we use + # `_self_influence_by_checkpoints`, which can accept a tuple representing a + # single batch as the `inputs_dataset` argument (as well as a DataLoader). + # Because we are already displaying progress in terms of number of batches + # processed in this method, we will not show progress for the call to + # `_self_influence_by_checkpoints`. + return torch.cat( + [ + self_influence_batch_fn(batch, show_progress=False) + for batch in inputs_dataset + ] + ) diff --git a/tests/influence/_core/test_tracin_self_influence.py b/tests/influence/_core/test_tracin_self_influence.py index 9448982a58..0f327ce3fb 100644 --- a/tests/influence/_core/test_tracin_self_influence.py +++ b/tests/influence/_core/test_tracin_self_influence.py @@ -57,6 +57,7 @@ def test_tracin_self_influence( criterion, ) + # calculate influence scores, using the training data as the test batch train_scores = tracin.influence( train_dataset.samples, train_dataset.labels, @@ -65,8 +66,12 @@ def test_tracin_self_influence( ) # calculate self_tracin_scores - self_tracin_scores = tracin.influence() + self_tracin_scores = tracin.self_influence( + DataLoader(train_dataset, batch_size=batch_size), + outer_loop_by_checkpoints=False, + ) + # check that self_tracin scores equals the diagonal of influence scores assertTensorAlmostEqual( self, torch.diagonal(train_scores), @@ -75,6 +80,20 @@ def test_tracin_self_influence( mode="max", ) + # check that setting `outer_loop_by_checkpoints=False` and + # `outer_loop_by_checkpoints=True` gives the same self influence scores + self_tracin_scores_by_checkpoints = tracin.self_influence( + DataLoader(train_dataset, batch_size=batch_size), + outer_loop_by_checkpoints=True, + ) + assertTensorAlmostEqual( + self, + self_tracin_scores_by_checkpoints, + self_tracin_scores, + delta=0.01, + mode="max", + ) + @parameterized.expand( [ (reduction, constructor, unpack_inputs) diff --git a/tests/influence/_core/test_tracin_show_progress.py b/tests/influence/_core/test_tracin_show_progress.py index 17b9065458..e940e2ed66 100644 --- a/tests/influence/_core/test_tracin_show_progress.py +++ b/tests/influence/_core/test_tracin_show_progress.py @@ -14,6 +14,7 @@ DataInfluenceConstructor, get_random_model_and_data, ) +from torch.utils.data import DataLoader class TestTracInShowProgress(BaseTest): @@ -28,6 +29,18 @@ class TestTracInShowProgress(BaseTest): in `TracInCPFastRandProj.__init__`). """ + def _check_error_msg_multiplicity(self, mock_stderr, msg, msg_multiplicity): + """ + checks that in `mock_stderr`, the error msg `msg` occurs `msg_multiplicity` + times + """ + output = mock_stderr.getvalue() + self.assertEqual( + output.count(msg), + msg_multiplicity, + f"Error in progress of batches with output: {repr(output)}", + ) + @parameterized.expand( [ ( @@ -45,7 +58,12 @@ class TestTracInShowProgress(BaseTest): DataInfluenceConstructor(TracInCPFast), ), ] - for mode in ["self influence", "influence", "k-most"] + for mode in [ + "self influence by checkpoints", + "self influence by batches", + "influence", + "k-most", + ] ], name_func=build_test_name_func(args_to_skip=["reduction"]), ) @@ -83,9 +101,13 @@ def test_tracin_show_progress( criterion, ) - if mode == "self influence": + if mode == "self influence by checkpoints": + # this tests progress for computing self influence scores, when + # `outer_loop_by_checkpoints` is True. In this case, we should see a + # single outer progress bar over checkpoints, and for every + # checkpoints, a separate progress bar over batches - # For self influence, displaying progress involves nested progress + # In this case, displaying progress involves nested progress # bars, which are not currently supported by the backup # `SimpleProgress` that is used if `tqdm` is not installed. # Therefore, we skip the test in this case. @@ -101,33 +123,50 @@ def test_tracin_show_progress( ) ) - tracin.influence(show_progress=True) - output = mock_stderr.getvalue() + tracin.self_influence( + DataLoader(train_dataset, batch_size=batch_size), + show_progress=True, + outer_loop_by_checkpoints=True, + ) + # We are showing nested progress bars for the `self_influence` # method, with the outer progress bar over checkpoints, and # the inner progress bar over batches. First, we check that # the outer progress bar reaches 100% once - self.assertEqual( - output.count( - ( - f"Using {tracin.get_name()} to compute self influence. " - "Processing checkpoint: 100%" - ) + self._check_error_msg_multiplicity( + mock_stderr, + ( + f"Using {tracin.get_name()} to compute self influence. " + "Processing checkpoint: 100%" ), 1, - f"Error in progress of batches with output: {repr(output)}", ) # Second, we check that the inner progress bar reaches 100% # once for each checkpoint in `tracin.checkpoints` - self.assertEqual( - output.count( - ( - f"Using {tracin.get_name()} to compute self influence. " - "Processing batch: 100%" - ) + self._check_error_msg_multiplicity( + mock_stderr, + ( + f"Using {tracin.get_name()} to compute self influence. " + "Processing batch: 100%" ), len(tracin.checkpoints), - f"Error in progress of checkpoints with output: {repr(output)}", + ) + elif mode == "self influence by batches": + # This tests progress for computing self influence scores, when + # `outer_loop_by_checkpoints` is False. In this case, we should see + # a single outer progress bar over batches. + tracin.self_influence( + DataLoader(train_dataset, batch_size=batch_size), + show_progress=True, + outer_loop_by_checkpoints=False, + ) + self._check_error_msg_multiplicity( + mock_stderr, + ( + f"Using {tracin.get_name()} to compute self influence. " + "Processing batch: 100%" + ), + 1, ) elif mode == "influence": @@ -137,16 +176,15 @@ def test_tracin_show_progress( k=None, show_progress=True, ) - output = mock_stderr.getvalue() - self.assertTrue( + # Since the computation iterates once over training batches, we + # check that the progress bar over batches reaches 100% once + self._check_error_msg_multiplicity( + mock_stderr, ( - ( - f"Using {tracin.get_name()} to compute influence " - "for training batches: 100%" - ) - in output + f"Using {tracin.get_name()} to compute influence " + "for training batches: 100%" ), - f"Error progress output: {repr(output)}", + 1, ) elif mode == "k-most": @@ -157,16 +195,17 @@ def test_tracin_show_progress( proponents=True, show_progress=True, ) - output = mock_stderr.getvalue() - self.assertTrue( + + # Since the computation iterates once over training batches, we + # check that the progress bar over batches reaches 100% once, and + # that the message is specific for finding proponents. + self._check_error_msg_multiplicity( + mock_stderr, ( - ( - f"Using {tracin.get_name()} to perform computation for " - "getting proponents. Processing training batches: 100%" - ) - in output + f"Using {tracin.get_name()} to perform computation for " + "getting proponents. Processing training batches: 100%" ), - f"Error progress output: {repr(output)}", + 1, ) mock_stderr.seek(0) mock_stderr.truncate(0) @@ -178,16 +217,17 @@ def test_tracin_show_progress( proponents=False, show_progress=True, ) - output = mock_stderr.getvalue() - self.assertTrue( + + # Since the computation iterates once over training batches, we + # check that the progress bar over batches reaches 100% once, and + # that the message is specific for finding opponents. + self._check_error_msg_multiplicity( + mock_stderr, ( - ( - f"Using {tracin.get_name()} to perform computation for " - "getting opponents. Processing training batches: 100%" - ) - in output + f"Using {tracin.get_name()} to perform computation for " + "getting opponents. Processing training batches: 100%" ), - f"Error progress output: {repr(output)}", + 1, ) else: raise Exception("unknown test mode") From fb6db3bf2380b6a97ba6b7f8dc548578c5b30c6e Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 1 Aug 2022 13:45:24 -0600 Subject: [PATCH 112/174] Fix branch --- .../models/_image/clip_resnet50x4_image.py | 96 +++++++++++-------- .../models/_image/clip_resnet50x4_text.py | 49 ++++++---- .../models/test_clip_resnet50x4_image.py | 39 +++++++- 3 files changed, 120 insertions(+), 64 deletions(-) diff --git a/captum/optim/models/_image/clip_resnet50x4_image.py b/captum/optim/models/_image/clip_resnet50x4_image.py index 4fc86a8880..14c3cc4ed0 100644 --- a/captum/optim/models/_image/clip_resnet50x4_image.py +++ b/captum/optim/models/_image/clip_resnet50x4_image.py @@ -23,8 +23,12 @@ def clip_resnet50x4_image( This model can be combined with the CLIP ResNet 50x4 Text model to create the full CLIP ResNet 50x4 model. - Note that model inputs are expected to have a shape of: [B, 3, 288, 288] or - [3, 288, 288]. + Note that the model was trained on inputs with a shape of: [B, 3, 288, 288]. + + Example:: + + >>> model = opt.models.clip_resnet50x4_image(pretrained=True) + >>> output = model(torch.zeros(1, 3, 288, 288)) See here for more details: https://github.com/openai/CLIP @@ -32,25 +36,30 @@ def clip_resnet50x4_image( Args: - pretrained (bool, optional): If True, returns a pre-trained model. - Default: False - progress (bool, optional): If True, displays a progress bar of the download to - stderr - Default: True + pretrained (bool, optional): If ``True``, returns a pre-trained model. + Default: ``False`` + progress (bool, optional): If ``True``, displays a progress bar of the download + to stderr. + Default: ``True`` model_path (str, optional): Optional path for the model file. - Default: None - replace_relus_with_redirectedrelu (bool, optional): If True, return pretrained - model with Redirected ReLU in place of ReLU layers. - Default: *True* when pretrained is True otherwise *False* - use_linear_modules_only (bool, optional): If True, return model + Default: ``None`` + replace_relus_with_redirectedrelu (bool, optional): If ``True``, return + pretrained model with Redirected ReLU in place of ReLU layers. + Default: *``True``* when ``pretrained`` is ``True`` otherwise *``False``* + use_linear_modules_only (bool, optional): If ``True``, return model with all nonlinear layers replaced with linear equivalents. - Default: False - transform_input (bool, optional): If True, preprocesses the input according to - the method with which it was trained. - Default: *True* when pretrained is True otherwise *False* + Default: ``False`` + transform_input (bool, optional): If ``True``, preprocesses the input according + to the method with which it was trained. + Default: *``True``* when ``pretrained`` is ``True`` otherwise *``False``* + use_attnpool (bool, optional): Whether or not to use the final + ``AttentionPool2d`` layer in the forward function. If set to ``True``, + model inputs are required to have a shape of: [B, 3, 288, 288] or + [3, 288, 288]. + Default: ``False`` Returns: - **CLIP_ResNet50x4Image** (CLIP_ResNet50x4Image): A CLIP ResNet 50x4 model's + model (CLIP_ResNet50x4Image): An instance of a CLIP ResNet 50x4 model's image portion. """ if pretrained: @@ -60,6 +69,8 @@ def clip_resnet50x4_image( kwargs["replace_relus_with_redirectedrelu"] = True if "use_linear_modules_only" not in kwargs: kwargs["use_linear_modules_only"] = False + if "use_attnpool" not in kwargs: + kwargs["use_attnpool"] = False model = CLIP_ResNet50x4Image(**kwargs) @@ -81,26 +92,32 @@ class CLIP_ResNet50x4Image(nn.Module): Visual Models From Natural Language Supervision': https://arxiv.org/abs/2103.00020 """ - __constants__ = ["transform_input"] + __constants__ = ["transform_input", "use_attnpool"] def __init__( self, transform_input: bool = False, replace_relus_with_redirectedrelu: bool = False, use_linear_modules_only: bool = False, + use_attnpool: bool = True, ) -> None: """ Args: - replace_relus_with_redirectedrelu (bool, optional): If True, return + replace_relus_with_redirectedrelu (bool, optional): If ``True``, return model with Redirected ReLU in place of ReLU layers. Default: False - use_linear_modules_only (bool, optional): If True, return model with + use_linear_modules_only (bool, optional): If ``True``, return model with all nonlinear layers replaced with linear equivalents. - Default: False - transform_input (bool, optional): If True, preprocesses the input according - to the method with which it was trained on. - Default: False + Default: ``False`` + transform_input (bool, optional): If ``True``, preprocesses the input + according to the method with which it was trained on. + Default: ``False`` + use_attnpool (bool, optional): Whether or not to use the final + ``AttentionPool2d`` layer in the forward function. If set to ``True``, + model inputs are required to have a shape of: [B, 3, 288, 288] or + [3, 288, 288]. + Default: ``True`` """ super().__init__() if use_linear_modules_only: @@ -112,6 +129,7 @@ def __init__( activ = nn.ReLU self.transform_input = transform_input + self.use_attnpool = use_attnpool # Stem layers self.conv1 = nn.Conv2d(3, 40, kernel_size=3, stride=2, padding=1, bias=False) @@ -149,21 +167,21 @@ def _build_layer( inplanes (int, optional): The number of input channels / features to use for the first layer. - Default: 80 + Default: ``80`` planes (int, optional): The number of output channels / features to use for the first layer. This variable is then multiplied by 4 to get the number of input channels / features to use for the subsequent layers. - Default: 80 + Default: ``80`` blocks (int, optional): The number of Bottleneck layers to create. - Default: 4 + Default: ``4`` stride (int, optional): The stride value to use for the Bottleneck layers. - Default: 1 + Default: ``1`` activ (type of nn.Module, optional): The nn.Module class type to use for activation layers. - Default: nn.ReLU + Default: ``nn.ReLU`` Returns: - residual_layer (nn.Sequential): A full residual layer. + residual_layer (nn.Sequential): A full residual layer instance. """ layers = [Bottleneck(inplanes, planes, stride, activ=activ)] for _ in range(blocks - 1): @@ -216,7 +234,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: x = self.layer4(x) # Attention Pooling - x = self.attnpool(x) + if self.use_attnpool: + x = self.attnpool(x) return x @@ -233,15 +252,15 @@ def __init__( inplanes (int, optional): The number of input channels / features to use for the first layer. - Default: 80 + Default: ``80`` planes (int, optional): The number of output channels / features to use for the subsequent layers. - Default: 80 + Default: ``80`` stride (int, optional): The stride value to use for the Bottleneck layers. - Default: 1 + Default: ``1`` activ (type of nn.Module, optional): The nn.Module class type to use for activation layers. - Default: nn.ReLU + Default: ``nn.ReLU`` """ super().__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) @@ -304,14 +323,15 @@ def __init__( spacial_size (int, optional): The desired size to user for the positional embedding. - Default: 9 + Default: ``9`` in_features (int, optional): The desired input size for the nn.Linear layers. - Default: 2560 + Default: ``2560`` out_features (int, optional): The desired output size for the nn.Linear layers. + Default: ``640`` num_heads (int, optional): The number of heads to use. - Default: 40 + Default: ``40`` """ super().__init__() self.positional_embedding = nn.Parameter( diff --git a/captum/optim/models/_image/clip_resnet50x4_text.py b/captum/optim/models/_image/clip_resnet50x4_text.py index 66cb58ce69..8fdbcc5179 100644 --- a/captum/optim/models/_image/clip_resnet50x4_text.py +++ b/captum/optim/models/_image/clip_resnet50x4_text.py @@ -23,33 +23,40 @@ def clip_resnet50x4_text( This model can be combined with the CLIP ResNet 50x4 Image model to create the full CLIP ResNet 50x4 model. + Example:: + + >>> model = opt.models.clip_resnet50x4_text(pretrained=True) + >>> clip_tokenizer = opt.transforms.CLIPTokenizer(pretrained_merges=True) + >>> tokenized_input = clip_tokenizer("Some example text.") + >>> output = model(tokenized_input) + See here for more details: https://github.com/openai/CLIP https://github.com/mlfoundations/open_clip Args: - pretrained (bool, optional): If True, returns a pre-trained model. - Default: False - progress (bool, optional): If True, displays a progress bar of the download to - stderr - Default: True + pretrained (bool, optional): If ``True``, returns a pre-trained model. + Default: ``False`` + progress (bool, optional): If ``True``, displays a progress bar of the download + to stderr. + Default: ``True`` model_path (str, optional): Optional path for the model file. - Default: None + Default: ``None`` width (int, optional): The desired width size to use for the model. - Default: 640 + Default: ``640`` num_heads (int, optional): The number of heads to use for the model. - Default: 10 + Default: ``10`` num_residual_layers (int, optional): The number of residual layers to use for each residual attention block in the model. - Default: 12 + Default: ``12`` content_length (int, optional): The expected size of text inputs to the model. - Default: 77 + Default: ``77`` vocab_size (int, optional): The size of the vocab used to train the model. - Default: 49408 + Default: ``49408`` Returns: - **CLIP_ResNet50x4Text** (CLIP_ResNet50x4Text): A CLIP ResNet 50x4 model's text + model (CLIP_ResNet50x4Text): An instance of a CLIP ResNet 50x4 model's text portion. """ if pretrained: @@ -85,17 +92,17 @@ def __init__( Args: width (int, optional): The desired width size to use for the model. - Default: 640 + Default: ``640`` num_heads (int, optional): The num number of heads to use for the model. - Default: 10 + Default: ``10`` num_residual_layers (int, optional): The number of residual layers to use for each residual attention block. - Default: 12 + Default: ``12`` content_length (int, optional): The expected size of text inputs to the model. - Default: 77 + Default: ``77`` vocab_size (int, optional): The size of the vocab used to train the model. - Default: 49408 + Default: ``49408`` """ super().__init__() self.transformer = nn.Sequential( @@ -154,11 +161,11 @@ def __init__( Args: width (int, optional): The desired width size to use. - Default: 640 + Default: ``640`` num_heads (int, optional): The num number of heads to use. - Default: 10 - content_length (int, optional): The desired content_length to use. - Default: 77 + Default: ``10`` + content_length (int, optional): The desired ``content_length`` to use. + Default: ``77`` """ super().__init__() self.attn = nn.MultiheadAttention(width, num_heads) diff --git a/tests/optim/models/test_clip_resnet50x4_image.py b/tests/optim/models/test_clip_resnet50x4_image.py index beb3d33595..ab5f22e52c 100644 --- a/tests/optim/models/test_clip_resnet50x4_image.py +++ b/tests/optim/models/test_clip_resnet50x4_image.py @@ -81,9 +81,10 @@ def test_clip_resnet50x4_image_load_and_forward(self) -> None: + " insufficient Torch version." ) x = torch.zeros(1, 3, 288, 288) - model = clip_resnet50x4_image(pretrained=True) + model = clip_resnet50x4_image(pretrained=True, use_attnpool=True) output = model(x) self.assertEqual(list(output.shape), [1, 640]) + self.assertTrue(model.use_attnpool) def test_untrained_clip_resnet50x4_image_load_and_forward(self) -> None: if version.parse(torch.__version__) <= version.parse("1.6.0"): @@ -92,9 +93,10 @@ def test_untrained_clip_resnet50x4_image_load_and_forward(self) -> None: + " insufficient Torch version." ) x = torch.zeros(1, 3, 288, 288) - model = clip_resnet50x4_image(pretrained=False) + model = clip_resnet50x4_image(pretrained=False, use_attnpool=True) output = model(x) self.assertEqual(list(output.shape), [1, 640]) + self.assertTrue(model.use_attnpool) def test_clip_resnet50x4_image_warning(self) -> None: if version.parse(torch.__version__) <= version.parse("1.6.0"): @@ -109,6 +111,30 @@ def test_clip_resnet50x4_image_warning(self) -> None: with self.assertWarns(UserWarning): _ = model._transform_input(x) + def test_clip_resnet50x4_image_use_attnpool_false(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping basic pretrained CLIP ResNet 50x4 Image use_attnpool" + + " forward due to insufficient Torch version." + ) + x = torch.zeros(1, 3, 288, 288) + model = clip_resnet50x4_image(pretrained=True, use_attnpool=False) + output = model(x) + self.assertEqual(list(output.shape), [1, 2560, 9, 9]) + self.assertFalse(model.use_attnpool) + + def test_clip_resnet50x4_image_use_attnpool_false_size_128(self) -> None: + if version.parse(torch.__version__) <= version.parse("1.6.0"): + raise unittest.SkipTest( + "Skipping basic pretrained CLIP ResNet 50x4 Image use_attnpool" + + " forward with 128x128 input due to insufficient Torch version." + ) + x = torch.zeros(1, 3, 128, 128) + model = clip_resnet50x4_image(pretrained=True, use_attnpool=False) + output = model(x) + self.assertEqual(list(output.shape), [1, 2560, 4, 4]) + self.assertFalse(model.use_attnpool) + def test_clip_resnet50x4_image_forward_cuda(self) -> None: if version.parse(torch.__version__) <= version.parse("1.6.0"): raise unittest.SkipTest( @@ -121,11 +147,12 @@ def test_clip_resnet50x4_image_forward_cuda(self) -> None: + " not supporting CUDA." ) x = torch.zeros(1, 3, 288, 288).cuda() - model = clip_resnet50x4_image(pretrained=True).cuda() + model = clip_resnet50x4_image(pretrained=True, use_attnpool=True).cuda() output = model(x) self.assertTrue(output.is_cuda) self.assertEqual(list(output.shape), [1, 640]) + self.assertTrue(model.use_attnpool) def test_clip_resnet50x4_image_jit_module_no_redirected_relu(self) -> None: if version.parse(torch.__version__) <= version.parse("1.8.0"): @@ -135,11 +162,12 @@ def test_clip_resnet50x4_image_jit_module_no_redirected_relu(self) -> None: ) x = torch.zeros(1, 3, 288, 288) model = clip_resnet50x4_image( - pretrained=True, replace_relus_with_redirectedrelu=False + pretrained=True, replace_relus_with_redirectedrelu=False, use_attnpool=True ) jit_model = torch.jit.script(model) output = jit_model(x) self.assertEqual(list(output.shape), [1, 640]) + self.assertTrue(model.use_attnpool) def test_clip_resnet50x4_image_jit_module_with_redirected_relu(self) -> None: if version.parse(torch.__version__) <= version.parse("1.8.0"): @@ -149,8 +177,9 @@ def test_clip_resnet50x4_image_jit_module_with_redirected_relu(self) -> None: ) x = torch.zeros(1, 3, 288, 288) model = clip_resnet50x4_image( - pretrained=True, replace_relus_with_redirectedrelu=True + pretrained=True, replace_relus_with_redirectedrelu=True, use_attnpool=True ) jit_model = torch.jit.script(model) output = jit_model(x) self.assertEqual(list(output.shape), [1, 640]) + self.assertTrue(model.use_attnpool) From a0ee122e35ac5733b8ef0a2417a589253fe43eac Mon Sep 17 00:00:00 2001 From: Meghpal <40922889+Meghpal@users.noreply.github.com> Date: Wed, 10 Aug 2022 13:09:23 -0700 Subject: [PATCH 113/174] Notebook support for tqdm (#1001) Summary: The `tqdm` progress in notebooks breaks in some cases (for me this behavior was persistent after I stopped the cell running this even once), possibly because it is not imported from the **recommmended** `tqdm.auto` ```python from tqdm import tqdm ``` ![image](https://user-images.githubusercontent.com/40922889/181494132-739f7097-1f86-4a3b-9089-d5cf650a84b3.png) However, when imported from `tqdm.auto` it works flawlessly: ```python from tqdm.auto import tqdm ``` ![image](https://user-images.githubusercontent.com/40922889/181494202-349666fd-cc89-42c7-b59c-6e9ad9967f03.png) Pull Request resolved: https://github.com/pytorch/captum/pull/1001 Reviewed By: 99warriors Differential Revision: D38282900 Pulled By: aobo-y fbshipit-source-id: bc4bd9b4e4d5b7ae2538186e0d9bfbaf730ef116 --- captum/_utils/progress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/captum/_utils/progress.py b/captum/_utils/progress.py index 2ece45ad99..435a08a19f 100644 --- a/captum/_utils/progress.py +++ b/captum/_utils/progress.py @@ -6,7 +6,7 @@ from typing import cast, Iterable, Sized, TextIO try: - from tqdm import tqdm + from tqdm.auto import tqdm except ImportError: tqdm = None From 03cea17d9ce73bf0abd623dc6f92204c84e3340b Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Thu, 11 Aug 2022 09:32:14 -0600 Subject: [PATCH 114/174] callable -> Callable --- captum/optim/_core/loss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/captum/optim/_core/loss.py b/captum/optim/_core/loss.py index 6ec08391b1..ffd7c8e43d 100644 --- a/captum/optim/_core/loss.py +++ b/captum/optim/_core/loss.py @@ -319,7 +319,7 @@ def __init__( """ Args: - loss_fn (callable): A function that takes a dict of captured activations + loss_fn (Callable): A function that takes a dict of captured activations with nn.Modules as keys, and then passes those activations through loss objective(s) & math operations. name (str, optional): The name of all composable operations in the @@ -1142,7 +1142,7 @@ def sum_loss_list( Args: loss_list (list): A list of loss objectives. - to_scalar_fn (callable): A function for converting loss objective outputs to + to_scalar_fn (Callable): A function for converting loss objective outputs to scalar values, in order to prevent size mismatches. Set to :class:`torch.nn.Identity` for no reduction op. Default: :func:`torch.mean` From a93a5cd5b0bde67107b3c3c02f64b704c727809d Mon Sep 17 00:00:00 2001 From: Narine Kokhlikyan Date: Thu, 11 Aug 2022 20:03:38 -0700 Subject: [PATCH 115/174] Add gpu support to tracincp rand projection (#969) Summary: Adds gpu support to tracincp rand projection. Cleaned up un-passed args to _load_flexible_state_dict Pull Request resolved: https://github.com/pytorch/captum/pull/969 Reviewed By: 99warriors Differential Revision: D38401980 Pulled By: NarineK fbshipit-source-id: 69c9aba4191bc929f150e24ac4e04d7a720d5d6f --- .../_core/tracincp_fast_rand_proj.py | 62 +++++++++---- captum/influence/_utils/common.py | 26 +++--- ...l.py => test_tracin_k_most_influential.py} | 93 ++++++++++++------- .../influence/_core/test_tracin_regression.py | 49 +++++++--- .../_core/test_tracin_self_influence.py | 46 ++++++--- tests/influence/_core/test_tracin_xor.py | 90 ++++++++++++------ tests/influence/_utils/common.py | 67 ++++++++++--- 7 files changed, 298 insertions(+), 135 deletions(-) rename tests/influence/_core/{test_tracin_get_k_most_influential.py => test_tracin_k_most_influential.py} (50%) diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py index f42dbd1527..7206812042 100644 --- a/captum/influence/_core/tracincp_fast_rand_proj.py +++ b/captum/influence/_core/tracincp_fast_rand_proj.py @@ -1,11 +1,15 @@ #!/usr/bin/env python3 +import threading import warnings -from typing import Any, Callable, Iterator, List, Optional, Tuple, Union +from collections import defaultdict +from typing import Any, Callable, cast, Dict, Iterator, List, Optional, Tuple, Union import torch -from captum._utils.common import _format_inputs, _get_module_from_name +from captum._utils.common import _format_inputs, _get_module_from_name, _sort_key_list +from captum._utils.gradient import _gather_distributed_tensors from captum._utils.progress import progress + from captum.influence._core.tracincp import ( _influence_route_to_helpers, KMostInfluentialResults, @@ -25,19 +29,10 @@ NearestNeighbors, ) from captum.log import log_usage -from torch import Tensor +from torch import device, Tensor from torch.nn import Module from torch.utils.data import DataLoader, Dataset -layer_inputs = [] - - -def _capture_inputs(layer: Module, input: Tensor, output: Tensor) -> None: - r"""Save activations into layer.activations in forward pass""" - - layer_inputs.append(input[0].detach()) - - r""" Implements abstract DataInfluence class and also provides implementation details for influence computation based on the logic provided in TracIn paper @@ -713,10 +708,26 @@ def _basic_computation_tracincp_fast( targets (tensor): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. """ - global layer_inputs - layer_inputs = [] + layer_inputs: Dict[device, Tuple[Tensor, ...]] = defaultdict() + lock = threading.Lock() + + def hook_wrapper(original_module): + def _capture_inputs(layer, input, output) -> None: + r"""Save activations into layer_inputs in forward pass""" + with lock: + is_eval_tuple = isinstance(input, tuple) + if is_eval_tuple: + layer_inputs_val = tuple(inp.detach() for inp in input) + else: + layer_inputs_val = input.detach() + layer_inputs[layer_inputs_val[0].device] = layer_inputs_val + + return _capture_inputs + assert isinstance(influence_instance.final_fc_layer, Module) - handle = influence_instance.final_fc_layer.register_forward_hook(_capture_inputs) + handle = influence_instance.final_fc_layer.register_forward_hook( + hook_wrapper(influence_instance.final_fc_layer) + ) out = influence_instance.model(*inputs) assert influence_instance.loss_fn is not None, "loss function is required" @@ -732,7 +743,16 @@ def _basic_computation_tracincp_fast( influence_instance.reduction_type, ) handle.remove() - _layer_inputs = layer_inputs[0] + + device_ids = cast( + Union[None, List[int]], + influence_instance.model.device_ids + if hasattr(influence_instance.model, "device_ids") + else None, + ) + key_list = _sort_key_list(list(layer_inputs.keys()), device_ids) + + _layer_inputs = _gather_distributed_tensors(layer_inputs, key_list=key_list)[0] assert len(input_jacobians.shape) == 2 @@ -1242,6 +1262,7 @@ def _set_projections_tracincp_fast_rand_proj( layer_input_dim = batch_layer_inputs.shape[ 1 ] # this is the dimension of the input of the last fully-connected layer + device = batch_jacobians.device # choose projection if needed # without projection, the dimension of the intermediate quantities returned @@ -1270,7 +1291,9 @@ def _set_projections_tracincp_fast_rand_proj( 1.0 / layer_input_projection_dim**0.5, ) - projection_quantities = jacobian_projection, layer_input_projection + projection_quantities = jacobian_projection.to( + device + ), layer_input_projection.to(device) return projection_quantities @@ -1341,9 +1364,8 @@ def _get_intermediate_quantities_tracincp_fast_rand_proj( # each element in this list will be of shape (batch_size, projection_dim) checkpoint_projections: List[Any] = [[] for _ in self.checkpoints] - if projection_quantities is None: - project = False - else: + project = False + if projection_quantities is not None: project = True jacobian_projection, layer_input_projection = projection_quantities diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index 131f8964b8..4954944527 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -6,6 +6,7 @@ import torch import torch.nn as nn from captum._utils.progress import progress + from torch import Tensor from torch.nn import Module from torch.utils.data import DataLoader, Dataset @@ -55,7 +56,6 @@ def _gradient_dot_product( total = _tensor_batch_dot(*next(iterator)) for input_grad, src_grad in iterator: total += _tensor_batch_dot(input_grad, src_grad) - total = torch.Tensor(total) return total @@ -141,9 +141,7 @@ def _jacobian_loss_wrt_inputs( return input_jacobians -def _load_flexible_state_dict( - model: Module, path: str, device_ids: str = "cpu", keyname: Optional[str] = None -) -> int: +def _load_flexible_state_dict(model: Module, path: str) -> float: r""" Helper to load pytorch models. This function attempts to find compatibility for loading models that were trained on different devices / with DataParallel but are @@ -156,21 +154,15 @@ def _load_flexible_state_dict( Args: model: The model for which to load a checkpoint path: The filepath to the checkpoint - keyname: The key under which the model state_dict is stored, if any. The module state_dict is modified in-place, and the learning rate is returned. """ - device = device_ids - - checkpoint = torch.load(path, map_location=device) + checkpoint = torch.load(path) - learning_rate = checkpoint.get("learning_rate", 1) + learning_rate = checkpoint.get("learning_rate", 1.0) # can get learning rate from optimizer state_dict? - if keyname is not None: - checkpoint = checkpoint[keyname] - if "module." in next(iter(checkpoint)): if isinstance(model, nn.DataParallel): model.load_state_dict(checkpoint) @@ -288,9 +280,15 @@ def _get_k_most_influential_helper( num_instances_processed += batch_size # combine the top-k for the batch with those for previously seen batches - topk_indices = torch.cat([topk_indices, batch_topk_indices], dim=1) + topk_indices = torch.cat( + [topk_indices.to(batch_topk_indices.device), batch_topk_indices], dim=1 + ) topk_tracin_scores = torch.cat( - [topk_tracin_scores, batch_topk_tracin_scores], dim=1 + [ + topk_tracin_scores.to(batch_topk_tracin_scores.device), + batch_topk_tracin_scores, + ], + dim=1, ) # retain only the top-k in terms of tracin_scores diff --git a/tests/influence/_core/test_tracin_get_k_most_influential.py b/tests/influence/_core/test_tracin_k_most_influential.py similarity index 50% rename from tests/influence/_core/test_tracin_get_k_most_influential.py rename to tests/influence/_core/test_tracin_k_most_influential.py index 017562d3d6..5512387e06 100644 --- a/tests/influence/_core/test_tracin_get_k_most_influential.py +++ b/tests/influence/_core/test_tracin_k_most_influential.py @@ -18,42 +18,55 @@ class TestTracInGetKMostInfluential(BaseTest): - """ - This test constructs a random BasicLinearNet, and checks that the proponents - obtained by calling `influence` and sorting are equal to the proponents - obtained by calling `_get_k_most_influential`. Those calls are made through - the calls to wrapper method `influence`. - """ + + use_gpu_list = ( + [True, False] + if torch.cuda.is_available() and torch.cuda.device_count() != 0 + else [False] + ) + + param_list = [] + for (batch_size, k) in [(4, 7), (7, 4), (40, 5), (5, 40), (40, 45)]: + for unpack_inputs in [True, False]: + for proponents in [True, False]: + for use_gpu in use_gpu_list: + for reduction, constr in [ + ("none", DataInfluenceConstructor(TracInCP)), + ( + "sum", + DataInfluenceConstructor( + TracInCP, + name="TracInCPFastRandProjTests", + sample_wise_grads_per_batch=True, + ), + ), + ("sum", DataInfluenceConstructor(TracInCPFast)), + ("sum", DataInfluenceConstructor(TracInCPFastRandProj)), + ("mean", DataInfluenceConstructor(TracInCPFast)), + ("mean", DataInfluenceConstructor(TracInCPFastRandProj)), + ]: + if not ( + "sample_wise_grads_per_batch" in constr.kwargs + and constr.kwargs["sample_wise_grads_per_batch"] + and use_gpu + ): + param_list.append( + ( + reduction, + constr, + unpack_inputs, + proponents, + batch_size, + k, + use_gpu, + ) + ) @parameterized.expand( - [ - (reduction, constr, unpack_inputs, proponents, batch_size, k) - # calls test helper method `test_tracin_get_k_most_influential` for several - # combinations of `batch_size` and `k`. This is important because the - # behavior of `_get_k_most_influential` depends on whether `k` is larger - # than `batch_size`. - for (batch_size, k) in [(4, 7), (7, 4), (40, 5), (5, 40), (40, 45)] - for unpack_inputs in [True, False] - for proponents in [True, False] - for reduction, constr in [ - ("none", DataInfluenceConstructor(TracInCP)), - ( - "sum", - DataInfluenceConstructor( - TracInCP, - name="TracInCPFastRandProjTests", - sample_wise_grads_per_batch=True, - ), - ), - ("sum", DataInfluenceConstructor(TracInCPFast)), - ("sum", DataInfluenceConstructor(TracInCPFastRandProj)), - ("mean", DataInfluenceConstructor(TracInCPFast)), - ("mean", DataInfluenceConstructor(TracInCPFastRandProj)), - ] - ], + param_list, name_func=build_test_name_func(), ) - def test_tracin_get_k_most_influential( + def test_tracin_k_most_influential( self, reduction: str, tracin_constructor: Callable, @@ -61,16 +74,26 @@ def test_tracin_get_k_most_influential( proponents: bool, batch_size: int, k: int, + use_gpu: bool, ) -> None: - + """ + This test constructs a random BasicLinearNet, and checks that the proponents + obtained by calling `influence` and sorting are equal to the proponents + obtained by calling `_k_most_influential`. Those calls are made through + the calls to wrapper method `influence`. + """ with tempfile.TemporaryDirectory() as tmpdir: - ( net, train_dataset, test_samples, test_labels, - ) = get_random_model_and_data(tmpdir, unpack_inputs, return_test_data=True) + ) = get_random_model_and_data( + tmpdir, + unpack_inputs, + True, + use_gpu, + ) self.assertTrue(isinstance(reduction, str)) self.assertTrue(callable(tracin_constructor)) diff --git a/tests/influence/_core/test_tracin_regression.py b/tests/influence/_core/test_tracin_regression.py index 7a615d2c9f..262c76d137 100644 --- a/tests/influence/_core/test_tracin_regression.py +++ b/tests/influence/_core/test_tracin_regression.py @@ -12,20 +12,23 @@ from parameterized import parameterized from tests.helpers.basic import assertTensorAlmostEqual, BaseTest from tests.influence._utils.common import ( + _isSorted, + _wrap_model_in_dataparallel, build_test_name_func, CoefficientNet, DataInfluenceConstructor, IdentityDataset, - isSorted, RangeDataset, ) class TestTracInRegression(BaseTest): - def _test_tracin_regression_setup(self, tmpdir: str, features: int): + def _test_tracin_regression_setup( + self, tmpdir: str, features: int, use_gpu: bool = False + ): low = 1 high = 17 - dataset = RangeDataset(low, high, features) + dataset = RangeDataset(low, high, features, use_gpu) net = CoefficientNet(in_features=features) checkpoint_name = "-".join(["checkpoint-reg", "0" + ".pt"]) @@ -35,15 +38,22 @@ def _test_tracin_regression_setup(self, tmpdir: str, features: int): for i, weight in enumerate(weights): net.fc1.weight.data.fill_(weight) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net checkpoint_name = "-".join(["checkpoint-reg", str(i + 1) + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) - return dataset, net + return dataset, net_adjusted - @parameterized.expand( - [ - (reduction, constructor, mode, dim) - for dim in [1, 20] + use_gpu_list = ( + [True, False] + if torch.cuda.is_available() and torch.cuda.device_count() != 0 + else [False] + ) + + param_list = [] + + for use_gpu in use_gpu_list: + for dim in [1, 20]: for (mode, reduction, constructor) in [ ("check_idx", "none", DataInfluenceConstructor(TracInCP)), ("sample_wise_trick", None, DataInfluenceConstructor(TracInCP)), @@ -60,8 +70,12 @@ def _test_tracin_regression_setup(self, tmpdir: str, features: int): projection_dim=1, ), ), - ] - ], + ]: + if not (mode == "sample_wise_trick" and use_gpu): + param_list.append((reduction, constructor, mode, dim, use_gpu)) + + @parameterized.expand( + param_list, name_func=build_test_name_func(args_to_skip=["reduction"]), ) def test_tracin_regression( @@ -70,12 +84,17 @@ def test_tracin_regression( tracin_constructor: Callable, mode: str, features: int, + use_gpu: bool, ) -> None: with tempfile.TemporaryDirectory() as tmpdir: batch_size = 4 - dataset, net = self._test_tracin_regression_setup(tmpdir, features) + dataset, net = self._test_tracin_regression_setup( + tmpdir, + features, + use_gpu, + ) # and not mode == 'sample_wise_trick' # check influence scores of training data @@ -85,6 +104,10 @@ def test_tracin_regression( test_inputs = ( torch.arange(17, 33, dtype=torch.float).unsqueeze(1).repeat(1, features) ) + + if use_gpu: + test_inputs = test_inputs.cuda() + test_labels = test_inputs self.assertTrue(callable(tracin_constructor)) @@ -119,7 +142,7 @@ def test_tracin_regression( # check that top influence is one with maximal value # (and hence gradient) for i in range(len(idx)): - self.assertTrue(isSorted(idx[i])) + self.assertTrue(_isSorted(idx[i])) if mode == "sample_wise_trick": diff --git a/tests/influence/_core/test_tracin_self_influence.py b/tests/influence/_core/test_tracin_self_influence.py index 0f327ce3fb..0ddbe17333 100644 --- a/tests/influence/_core/test_tracin_self_influence.py +++ b/tests/influence/_core/test_tracin_self_influence.py @@ -16,34 +16,54 @@ class TestTracInSelfInfluence(BaseTest): - @parameterized.expand( - [ - (reduction, constructor, unpack_inputs) - for unpack_inputs in [True, False] + + use_gpu_list = ( + [True, False] + if torch.cuda.is_available() and torch.cuda.device_count() != 0 + else [False] + ) + + param_list = [] + for unpack_inputs in [True, False]: + for use_gpu in use_gpu_list: for (reduction, constructor) in [ ("none", DataInfluenceConstructor(TracInCP)), ( "sum", DataInfluenceConstructor( TracInCP, - name="TracInCPFastRandProjTests", + name="TracInCP_sample_wise_grads_per_batch", sample_wise_grads_per_batch=True, ), ), ("sum", DataInfluenceConstructor(TracInCPFast)), ("mean", DataInfluenceConstructor(TracInCPFast)), - ] - ], + ]: + if not ( + "sample_wise_grads_per_batch" in constructor.kwargs + and constructor.kwargs["sample_wise_grads_per_batch"] + and use_gpu + ): + param_list.append((reduction, constructor, unpack_inputs, use_gpu)) + + @parameterized.expand( + param_list, name_func=build_test_name_func(), ) def test_tracin_self_influence( - self, reduction: str, tracin_constructor: Callable, unpack_inputs: bool + self, + reduction: str, + tracin_constructor: Callable, + unpack_inputs: bool, + use_gpu: bool, ) -> None: with tempfile.TemporaryDirectory() as tmpdir: - ( - net, - train_dataset, - ) = get_random_model_and_data(tmpdir, unpack_inputs, return_test_data=False) + (net, train_dataset,) = get_random_model_and_data( + tmpdir, + unpack_inputs, + False, + use_gpu, + ) # compute tracin_scores of training data on training data criterion = nn.MSELoss(reduction=reduction) @@ -56,8 +76,6 @@ def test_tracin_self_influence( batch_size, criterion, ) - - # calculate influence scores, using the training data as the test batch train_scores = tracin.influence( train_dataset.samples, train_dataset.labels, diff --git a/tests/influence/_core/test_tracin_xor.py b/tests/influence/_core/test_tracin_xor.py index 52a71afcf7..d6f205d790 100644 --- a/tests/influence/_core/test_tracin_xor.py +++ b/tests/influence/_core/test_tracin_xor.py @@ -10,6 +10,7 @@ from parameterized import parameterized from tests.helpers.basic import assertTensorAlmostEqual, BaseTest from tests.influence._utils.common import ( + _wrap_model_in_dataparallel, BasicLinearNet, BinaryDataset, build_test_name_func, @@ -18,8 +19,9 @@ class TestTracInXOR(BaseTest): + # TODO: Move test setup to use setUp and tearDown method overrides. - def _test_tracin_xor_setup(self, tmpdir: str): + def _test_tracin_xor_setup(self, tmpdir: str, use_gpu: bool = False): net = BasicLinearNet(2, 2, 1) state = OrderedDict( @@ -34,8 +36,10 @@ def _test_tracin_xor_setup(self, tmpdir: str): ] ) net.load_state_dict(state) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net + checkpoint_name = "-".join(["checkpoint", "class", "0" + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) state = OrderedDict( [ @@ -49,8 +53,10 @@ def _test_tracin_xor_setup(self, tmpdir: str): ] ) net.load_state_dict(state) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net + checkpoint_name = "-".join(["checkpoint", "class", "1" + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) state = OrderedDict( [ @@ -64,8 +70,10 @@ def _test_tracin_xor_setup(self, tmpdir: str): ] ) net.load_state_dict(state) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net + checkpoint_name = "-".join(["checkpoint", "class", "2" + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) state = OrderedDict( [ @@ -79,8 +87,10 @@ def _test_tracin_xor_setup(self, tmpdir: str): ] ) net.load_state_dict(state) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net + checkpoint_name = "-".join(["checkpoint", "class", "3" + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) state = OrderedDict( [ @@ -94,8 +104,10 @@ def _test_tracin_xor_setup(self, tmpdir: str): ] ) net.load_state_dict(state) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net + checkpoint_name = "-".join(["checkpoint", "class", "4" + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) state = OrderedDict( [ @@ -109,8 +121,10 @@ def _test_tracin_xor_setup(self, tmpdir: str): ] ) net.load_state_dict(state) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net + checkpoint_name = "-".join(["checkpoint", "class", "5" + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) state = OrderedDict( [ @@ -124,8 +138,10 @@ def _test_tracin_xor_setup(self, tmpdir: str): ] ) net.load_state_dict(state) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net + checkpoint_name = "-".join(["checkpoint", "class", "6" + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) state = OrderedDict( [ @@ -139,38 +155,57 @@ def _test_tracin_xor_setup(self, tmpdir: str): ] ) net.load_state_dict(state) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net + checkpoint_name = "-".join(["checkpoint", "class", "7" + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) - dataset = BinaryDataset() + dataset = BinaryDataset(use_gpu) - return net, dataset + return net_adjusted, dataset - @parameterized.expand( - [ + parametrized_list = [ + ( + "none", + DataInfluenceConstructor(TracInCP), + "check_idx", + False, + ), + ( + None, + DataInfluenceConstructor(TracInCP), + "sample_wise_trick", + False, + ), + ] + + if torch.cuda.is_available() and torch.cuda.device_count() != 0: + parametrized_list.append( ( "none", DataInfluenceConstructor(TracInCP), "check_idx", - ), - ( - None, - DataInfluenceConstructor(TracInCP), - "sample_wise_trick", - ), - ], + True, + ) + ) + + @parameterized.expand( + parametrized_list, name_func=build_test_name_func(args_to_skip=["reduction"]), ) def test_tracin_xor( - self, reduction: Optional[str], tracin_constructor: Callable, mode: str + self, + reduction: Optional[str], + tracin_constructor: Callable, + mode: str, + use_gpu: bool, ) -> None: with tempfile.TemporaryDirectory() as tmpdir: - dataset = BinaryDataset() - net = BasicLinearNet(2, 2, 1) - + # net = BasicLinearNet(2, 2, 1) + # net = wrap_model_in_dataparallel(net) if use_gpu else net batch_size = 4 - net, dataset = self._test_tracin_xor_setup(tmpdir) + net, dataset = self._test_tracin_xor_setup(tmpdir, use_gpu) testset = F.normalize(torch.empty(100, 2).normal_(mean=0, std=0.5), dim=1) mask = ~torch.logical_xor(testset[:, 0] > 0, testset[:, 1] > 0) @@ -179,6 +214,9 @@ def test_tracin_xor( .unsqueeze(1) .float() ) + if use_gpu: + testset = testset.cuda() + testlabels = testlabels.cuda() self.assertTrue(callable(tracin_constructor)) @@ -196,7 +234,6 @@ def test_tracin_xor( ) test_scores = tracin.influence(testset, testlabels) idx = torch.argsort(test_scores, dim=1, descending=True) - # check that top 5 influences have matching binary classification for i in range(len(idx)): influence_labels = dataset.labels[idx[i][0:5], 0] @@ -225,7 +262,6 @@ def test_tracin_xor( criterion, sample_wise_grads_per_batch=True, ) - test_scores = tracin.influence(testset, testlabels) test_scores_sample_wise_trick = tracin_sample_wise_trick.influence( testset, testlabels diff --git a/tests/influence/_utils/common.py b/tests/influence/_utils/common.py index 90f14353c7..3ab018e5b9 100644 --- a/tests/influence/_utils/common.py +++ b/tests/influence/_utils/common.py @@ -18,16 +18,33 @@ from torch.utils.data import DataLoader, Dataset -def isSorted(x, key=lambda x: x, descending=True): +def _isSorted(x, key=lambda x: x, descending=True): if descending: return all([key(x[i]) >= key(x[i + 1]) for i in range(len(x) - 1)]) else: return all([key(x[i]) <= key(x[i + 1]) for i in range(len(x) - 1)]) +def _wrap_model_in_dataparallel(net): + alt_device_ids = [0] + [x for x in range(torch.cuda.device_count() - 1, 0, -1)] + net = net.cuda() + return torch.nn.DataParallel(net, device_ids=alt_device_ids) + + +def _move_sample_to_cuda(samples): + return [s.cuda() for s in samples] + + class ExplicitDataset(Dataset): - def __init__(self, samples, labels) -> None: + def __init__(self, samples, labels, use_gpu=False) -> None: self.samples, self.labels = samples, labels + if use_gpu: + self.samples = ( + _move_sample_to_cuda(self.samples) + if isinstance(self.samples, list) + else self.samples.cuda() + ) + self.labels = self.labels.cuda() def __len__(self): return len(self.samples) @@ -37,8 +54,15 @@ def __getitem__(self, idx): class UnpackDataset(Dataset): - def __init__(self, samples, labels) -> None: + def __init__(self, samples, labels, use_gpu=False) -> None: self.samples, self.labels = samples, labels + if use_gpu: + self.samples = ( + _move_sample_to_cuda(self.samples) + if isinstance(self.samples, list) + else self.samples.cuda() + ) + self.labels = self.labels.cuda() def __len__(self): return len(self.samples[0]) @@ -52,23 +76,29 @@ def __getitem__(self, idx): class IdentityDataset(ExplicitDataset): - def __init__(self, num_features) -> None: + def __init__(self, num_features, use_gpu=False) -> None: self.samples = torch.diag(torch.ones(num_features)) self.labels = torch.zeros(num_features).unsqueeze(1) + if use_gpu: + self.samples = self.samples.cuda() + self.labels = self.labels.cuda() class RangeDataset(ExplicitDataset): - def __init__(self, low, high, num_features) -> None: + def __init__(self, low, high, num_features, use_gpu=False) -> None: self.samples = ( torch.arange(start=low, end=high, dtype=torch.float) .repeat(num_features, 1) .transpose(1, 0) ) self.labels = torch.arange(start=low, end=high, dtype=torch.float).unsqueeze(1) + if use_gpu: + self.samples = self.samples.cuda() + self.labels = self.labels.cuda() class BinaryDataset(ExplicitDataset): - def __init__(self) -> None: + def __init__(self, use_gpu=False) -> None: self.samples = F.normalize( torch.stack( ( @@ -105,6 +135,7 @@ def __init__(self) -> None: torch.Tensor([-1]).repeat(12, 1), ) ) + super().__init__(self.samples, self.labels, use_gpu) class CoefficientNet(nn.Module): @@ -148,7 +179,9 @@ def forward(self, *inputs): return torch.tanh(self.linear2(x)) -def get_random_model_and_data(tmpdir, unpack_inputs, return_test_data=True): +def get_random_model_and_data( + tmpdir, unpack_inputs, return_test_data=True, use_gpu=False +): in_features, hidden_nodes, out_features = 5, 4, 3 num_inputs = 2 @@ -169,7 +202,8 @@ def get_random_model_and_data(tmpdir, unpack_inputs, return_test_data=True): 3, 4, (in_features, in_features * num_inputs) ) checkpoint_name = "-".join(["checkpoint-reg", str(i + 1) + ".pt"]) - torch.save(net.state_dict(), os.path.join(tmpdir, checkpoint_name)) + net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net + torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name)) num_samples = 50 num_train = 32 @@ -189,15 +223,24 @@ def get_random_model_and_data(tmpdir, unpack_inputs, return_test_data=True): test_samples = all_samples[num_train:] dataset = ( - ExplicitDataset(train_samples, train_labels) + ExplicitDataset(train_samples, train_labels, use_gpu) if not unpack_inputs - else UnpackDataset(train_samples, train_labels) + else UnpackDataset(train_samples, train_labels, use_gpu) ) if return_test_data: - return net, dataset, test_samples, test_labels + return ( + _wrap_model_in_dataparallel(net) if use_gpu else net, + dataset, + _move_sample_to_cuda(test_samples) + if isinstance(test_samples, list) and use_gpu + else test_samples.cuda() + if use_gpu + else test_samples, + test_labels.cuda() if use_gpu else test_labels, + ) else: - return net, dataset + return _wrap_model_in_dataparallel(net) if use_gpu else net, dataset class DataInfluenceConstructor: From 9263ae17a946722d15a1a052e85659d2abdc06aa Mon Sep 17 00:00:00 2001 From: Fulton Wang Date: Tue, 16 Aug 2022 20:09:36 -0700 Subject: [PATCH 116/174] update TracInCP tutorial to use `train_dataset` argument Summary: The API of TracIn was changed so that for all implementations, the initialization argument for the training dataset is now called `train_dataset` instead of `influence_src_dataset`. In this diff, the TracIn tutorial is updated to reflect this change. It just involves changing the named argument in TracIn constructors. Reviewed By: NarineK Differential Revision: D38378555 fbshipit-source-id: dddb4e320b094223964cb8d048821e9aa45281fe --- tutorials/TracInCP_Tutorial.ipynb | 64 ++++++++----------------------- 1 file changed, 16 insertions(+), 48 deletions(-) diff --git a/tutorials/TracInCP_Tutorial.ipynb b/tutorials/TracInCP_Tutorial.ipynb index bcfbe60a77..e8a0e112a7 100644 --- a/tutorials/TracInCP_Tutorial.ipynb +++ b/tutorials/TracInCP_Tutorial.ipynb @@ -691,7 +691,7 @@ "tracin_cp_fast = TracInCPFast(\n", " model=net,\n", " final_fc_layer=list(net.children())[-1],\n", - " influence_src_dataset=correct_dataset,\n", + " train_dataset=correct_dataset,\n", " checkpoints=correct_dataset_checkpoint_paths,\n", " checkpoints_load_func=checkpoints_load_func,\n", " loss_fn=nn.CrossEntropyLoss(reduction=\"sum\"),\n", @@ -718,7 +718,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": { "code_folding": [], "customInput": null, @@ -729,15 +729,7 @@ "requestMsgId": "3d109a69-eb97-45bf-9682-f336b5eeffd3", "showInput": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Computed proponents / opponents over a dataset of 50000 examples in 1.22 minutes\n" - ] - } - ], + "outputs": [], "source": [ "k = 10\n", "start_time = datetime.datetime.now()\n", @@ -1160,7 +1152,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": { "code_folding": [], "customInput": null, @@ -1171,22 +1163,14 @@ "requestMsgId": "495ed6b5-183c-475b-b26e-a38270c51779", "showInput": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Performed pre-processing of a dataset of 50000 examples in 5.92 minutes\n" - ] - } - ], + "outputs": [], "source": [ "from captum.influence._utils.nearest_neighbors import AnnoyNearestNeighbors\n", "start_time = datetime.datetime.now()\n", "tracin_cp_fast_rand_proj = TracInCPFastRandProj(\n", " model=net,\n", " final_fc_layer=list(net.children())[-1],\n", - " influence_src_dataset=correct_dataset,\n", + " train_dataset=correct_dataset,\n", " checkpoints=correct_dataset_checkpoint_paths,\n", " checkpoints_load_func=checkpoints_load_func,\n", " loss_fn=nn.CrossEntropyLoss(reduction=\"sum\"),\n", @@ -1217,7 +1201,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": { "code_folding": [], "executionStopTime": 1645988498023, @@ -1225,15 +1209,7 @@ "originalKey": "d06f872f-d82c-4369-b0f8-8043551279f7", "requestMsgId": "d06f872f-d82c-4369-b0f8-8043551279f7" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Computed proponents / opponents over a dataset of 50000 examples in 0.01 minutes\n" - ] - } - ], + "outputs": [], "source": [ "k = 10\n", "start_time = datetime.datetime.now()\n", @@ -1663,7 +1639,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": { "code_folding": [], "customInput": null, @@ -1674,15 +1650,7 @@ "requestMsgId": "d7a7e6d1-119f-4703-981a-62b1e1374513", "showInput": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generated incorrect labels in 0.52 minutes\n" - ] - } - ], + "outputs": [], "source": [ "start_time = datetime.datetime.now()\n", "incorrect_labels = []\n", @@ -1919,7 +1887,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "metadata": { "code_folding": [], "customInput": null, @@ -1963,7 +1931,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": { "code_folding": [], "executionStartTime": 1646067341246, @@ -1977,7 +1945,7 @@ "tracin_cp_fast = TracInCPFast(\n", " model=net,\n", " final_fc_layer=list(net.children())[-1],\n", - " influence_src_dataset=mislabelled_dataset,\n", + " train_dataset=mislabelled_dataset,\n", " checkpoints=mislabelled_dataset_checkpoint_paths,\n", " checkpoints_load_func=checkpoints_load_func,\n", " loss_fn=nn.CrossEntropyLoss(reduction=\"sum\"),\n", @@ -2000,7 +1968,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "metadata": { "code_folding": [], "executionStartTime": 1646067346865, @@ -2014,7 +1982,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "computed self influence scores for 50000 examples in 0.59 minutes\n" + "computed self influence scores for 50000 examples in 0.48 minutes\n" ] } ], @@ -2042,7 +2010,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 33, "metadata": { "code_folding": [], "executionStartTime": 1646067380564, From 37516c17e20be8c3aec84495d31a3c374e178cc8 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Wed, 17 Aug 2022 11:46:26 -0700 Subject: [PATCH 117/174] Improve version checking (#999) Summary: Without the packaging library, statements like: `"1.8.0" > "1.10.0"` will be equal to True, despite v1.10 being a later version that v1.8.0. The `packaging` library will in some cases not be already installed on a user's device, so I've also added it the `setup.py` file. It's one of the core libraries from the Python Packaging Authority, but it's not included with the base Python installation: https://packaging.python.org/en/latest/key_projects/#pypa-projects This wasn't an issue in https://github.com/pytorch/captum/pull/940 as one the libraries in dev install has `packaging` as a dependency. So, there's no error when the tests are using the `packaging` library. Pull Request resolved: https://github.com/pytorch/captum/pull/999 Reviewed By: vivekmig Differential Revision: D38693600 Pulled By: NarineK fbshipit-source-id: a5ea5ef6f2ca175d60f1638072add2fea6d31091 --- captum/_utils/common.py | 23 ++++++++++- .../influence/_core/similarity_influence.py | 2 +- captum/influence/_utils/common.py | 3 +- tests/utils/test_common.py | 40 ++++++++++++++++++- 4 files changed, 64 insertions(+), 4 deletions(-) diff --git a/captum/_utils/common.py b/captum/_utils/common.py index 6db0727024..1bad602896 100644 --- a/captum/_utils/common.py +++ b/captum/_utils/common.py @@ -18,6 +18,27 @@ from torch.nn import Module +def _parse_version(v: str) -> Tuple[int, ...]: + """ + Parse version strings into tuples for comparison. + + Versions should be in the form of "..", ".", + or "". The "dev", "post" and other letter portions of the given version will + be ignored. + + Args: + + v (str): A version string. + + Returns: + version_tuple (tuple of int): A tuple of integer values to use for version + comparison. + """ + v = [n for n in v.split(".") if n.isdigit()] + assert v != [] + return tuple(map(int, v)) + + class ExpansionTypes(Enum): repeat = 1 repeat_interleave = 2 @@ -671,7 +692,7 @@ def _register_backward_hook( ): return module.register_backward_hook(hook) - if torch.__version__ >= "1.9": + if _parse_version(torch.__version__) >= (1, 9, 0): # Only supported for torch >= 1.9 return module.register_full_backward_hook(hook) else: diff --git a/captum/influence/_core/similarity_influence.py b/captum/influence/_core/similarity_influence.py index 83cb2966fa..0fd21eedb7 100644 --- a/captum/influence/_core/similarity_influence.py +++ b/captum/influence/_core/similarity_influence.py @@ -40,7 +40,7 @@ def cosine_similarity(test, train, replace_nan=0) -> Tensor: test = test.view(test.shape[0], -1) train = train.view(train.shape[0], -1) - if torch.__version__ <= "1.6.0": + if common._parse_version(torch.__version__) <= (1, 6, 0): test_norm = torch.norm(test, p=None, dim=1, keepdim=True) train_norm = torch.norm(train, p=None, dim=1, keepdim=True) else: diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index 4954944527..cd989098c8 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -5,6 +5,7 @@ import torch import torch.nn as nn +from captum._utils.common import _parse_version from captum._utils.progress import progress from torch import Tensor @@ -126,7 +127,7 @@ def _jacobian_loss_wrt_inputs( "Must be either 'sum' or 'mean'." ) - if torch.__version__ >= "1.8": + if _parse_version(torch.__version__) >= (1, 8, 0): input_jacobians = torch.autograd.functional.jacobian( lambda out: loss_fn(out, targets), out, vectorize=vectorize ) diff --git a/tests/utils/test_common.py b/tests/utils/test_common.py index 5bea797e97..e19c3c26b9 100644 --- a/tests/utils/test_common.py +++ b/tests/utils/test_common.py @@ -3,7 +3,13 @@ from typing import cast, List, Tuple import torch -from captum._utils.common import _reduce_list, _select_targets, _sort_key_list, safe_div +from captum._utils.common import ( + _parse_version, + _reduce_list, + _select_targets, + _sort_key_list, + safe_div, +) from tests.helpers.basic import assertTensorAlmostEqual, BaseTest @@ -109,3 +115,35 @@ def test_select_target_3d(self) -> None: # Verify error is raised if too many dimensions are provided. with self.assertRaises(AssertionError): _select_targets(output_tensor, (1, 2, 3)) + + +class TestParseVersion(BaseTest): + def test_parse_version_dev(self) -> None: + version_str = "1.12.0.dev20201109" + output = _parse_version(version_str) + self.assertEqual(output, (1, 12, 0)) + + def test_parse_version_post(self) -> None: + version_str = "1.3.0.post2" + output = _parse_version(version_str) + self.assertEqual(output, (1, 3, 0)) + + def test_parse_version_1_12_0(self) -> None: + version_str = "1.12.0" + output = _parse_version(version_str) + self.assertEqual(output, (1, 12, 0)) + + def test_parse_version_1_12_2(self) -> None: + version_str = "1.12.2" + output = _parse_version(version_str) + self.assertEqual(output, (1, 12, 2)) + + def test_parse_version_1_6_0(self) -> None: + version_str = "1.6.0" + output = _parse_version(version_str) + self.assertEqual(output, (1, 6, 0)) + + def test_parse_version_1_12(self) -> None: + version_str = "1.12" + output = _parse_version(version_str) + self.assertEqual(output, (1, 12)) From 312acd85a0fa07c4b00b64c07520b918e04a8ff0 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 19 Aug 2022 09:36:40 -0700 Subject: [PATCH 118/174] Fix failing conda test, & fix a GPU test bug (#1009) Summary: I think that the amount of Conda install timeouts can be lessened by removing unnecessary installs: * Sphinx is not used for the Conda tests, so it doesn't make sense to install it. * NumPy is a dependency of PyTorch, so there is no need to install it after installing PyTorch. The `nodejs` install seems to be what takes up the most time, and I think its where the solver sometimes fails. Using the libmamba solver seems to prevent it from failing, but it still takes vast majority of the install time (around 20 minutes) to install `nodejs`. https://www.anaconda.com/blog/a-faster-conda-for-a-growing-community I also noticed a recurring error in the GPU tests, and implemented a fix for it: ``` Errors were encountered while processing: sane-utils W: --force-yes is deprecated, use one of the options starting with --allow instead. E: Sub-process /usr/bin/dpkg returned an error code (1) ``` This PR along with https://github.com/pytorch/captum/issues/1007 should fix the test failures that Captum is experiencing. vivekmig Pull Request resolved: https://github.com/pytorch/captum/pull/1009 Reviewed By: NarineK Differential Revision: D38786344 Pulled By: vivekmig fbshipit-source-id: 508aba387f62302053945b9eb1c94d51a7c27915 --- .circleci/config.yml | 1 + scripts/install_via_conda.sh | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a3cccbf9b7..1e74e5e328 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -121,6 +121,7 @@ commands: sudo dpkg -i cuda-repo-ubuntu2004-11-4-local_11.4.2-470.57.02-1_amd64.deb sudo apt-key add /var/cuda-repo-ubuntu2004-11-4-local/7fa2af80.pub sudo apt-get update + sudo dpkg --configure -a sudo apt-get --yes --force-yes install cuda jobs: diff --git a/scripts/install_via_conda.sh b/scripts/install_via_conda.sh index a8e32b8d22..88a9603ade 100755 --- a/scripts/install_via_conda.sh +++ b/scripts/install_via_conda.sh @@ -21,6 +21,10 @@ conda update --all --yes # required to use conda develop conda install -y conda-build +# Use faster conda solver +conda install -n base conda-libmamba-solver +conda config --set experimental_solver libmamba + # install other frameworks if asked for and make sure this is before pytorch if [[ $FRAMEWORKS == true ]]; then pip install pytext-nlp @@ -35,10 +39,13 @@ else fi # install other deps -conda install -y numpy sphinx pytest flake8 ipywidgets ipython scikit-learn parameterized -conda install -y -c conda-forge matplotlib pytest-cov sphinx-autodoc-typehints mypy flask flask-compress +# conda install -y numpy sphinx pytest flake8 ipywidgets ipython scikit-learn parameterized +# conda install -y -c conda-forge matplotlib pytest-cov sphinx-autodoc-typehints mypy flask flask-compress +conda install -y pytest flake8 ipywidgets ipython scikit-learn parameterized +conda install -y -c conda-forge matplotlib pytest-cov mypy flask flask-compress + # deps not available in conda -pip install sphinxcontrib-katex +# pip install sphinxcontrib-katex # install node/yarn for insights build conda install -y -c conda-forge yarn From dbb5a31847d6abedc525d94ba007012645b62b00 Mon Sep 17 00:00:00 2001 From: Diamond Bishop Date: Tue, 23 Aug 2022 09:39:12 -0700 Subject: [PATCH 119/174] Fixed bug in House_Prices_Regression_Interpret tutorial (#1014) Summary: Described in issue: [1012](https://github.com/pytorch/captum/issues/1012) **Background** There's a line that must have been edited/added at some point that assumes more then one tensor is being returned from lc.attribute, but there's only one (since only one tensor is passed in): lc_attr_test = lc.attribute(X_test, n_steps=100, attribute_to_layer_input=True) # shape: test_examples x size_hidden lc_attr_test = lc_attr_test[0] The second line here of setting lc_attr_test to the 0th index, then sets it to the tensor index instead of the first tensor, which in turn in the next cell means that "lc_attr_test.shape[1]" throws an exception. **Changes** Fix is to just take out the reassigning of "lc_attr_test = lc_attr_test[0]". **Testing** Tested running the notebook before the change (which showed the error described) and after, which produced the plot which was originally shown in the static tutorial page and matches expectations now (see attachment) Screen Shot 2022-08-18 at 9 59 25 PM . Pull Request resolved: https://github.com/pytorch/captum/pull/1014 Reviewed By: vivekmig Differential Revision: D38927725 Pulled By: dbish fbshipit-source-id: bda2c0a98638ea1b0f5dc15f8c249985fc890cf7 --- .../House_Prices_Regression_Interpret.ipynb | 42 +++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/tutorials/House_Prices_Regression_Interpret.ipynb b/tutorials/House_Prices_Regression_Interpret.ipynb index aee3cfc3a0..497ad58987 100644 --- a/tutorials/House_Prices_Regression_Interpret.ipynb +++ b/tutorials/House_Prices_Regression_Interpret.ipynb @@ -128,14 +128,12 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -430,7 +428,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -463,14 +461,12 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -562,16 +558,15 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# Compute the attributions of the output with respect to the inputs of the fourth linear layer\n", "lc = LayerConductance(model, model.lin4)\n", - "lc_attr_test = lc.attribute(X_test, n_steps=100, attribute_to_layer_input=True)\n", "\n", "# shape: test_examples x size_hidden\n", - "lc_attr_test = lc_attr_test[0]\n", + "lc_attr_test = lc.attribute(X_test, n_steps=100, attribute_to_layer_input=True)\n", "\n", "# weights from forth linear layer\n", "# shape: size_hidden4 x size_hidden3\n", @@ -588,19 +583,17 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 22, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -646,7 +639,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.9.13 ('venv')", "language": "python", "name": "python3" }, @@ -660,7 +653,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "4311c7dda575c081001492aac26d536ae97e4c13a1d6ad5cc980ffae203d70d8" + } } }, "nbformat": 4, From 656528041b60d6daa06f07f77b9621a4055c7142 Mon Sep 17 00:00:00 2001 From: Fulton Wang Date: Tue, 23 Aug 2022 12:45:39 -0700 Subject: [PATCH 120/174] change tracin progress test (#1007) Summary: Pull Request resolved: https://github.com/pytorch/captum/pull/1007 the tests for `test_tracin_show_progress` were failing we check that the progress reaches 100% X times, but sometimes, the progress reaches 100% more than X times. As aobo-y pointed out, this is because tqdm will sometimes correct its estimate of it/s for the total iteration over progress, and print 100% an additional time, with the updated estimate of it/s. The fix is to check that progress reaches 100% at least X times. We don't check that it reaches 100% either X or X+1 times, because all we can reasonably assume of tqdm is that it may re-estimate it/s >= 0 times. Note that this change is to correct a problem with tqdm, *not* `SimpleProgress`. Reviewed By: NarineK Differential Revision: D38443861 fbshipit-source-id: 3d6b8588380014e4e6e4cf8e0dfd5464c50ce7be --- .../_core/test_tracin_show_progress.py | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/tests/influence/_core/test_tracin_show_progress.py b/tests/influence/_core/test_tracin_show_progress.py index e940e2ed66..429ac88972 100644 --- a/tests/influence/_core/test_tracin_show_progress.py +++ b/tests/influence/_core/test_tracin_show_progress.py @@ -29,17 +29,35 @@ class TestTracInShowProgress(BaseTest): in `TracInCPFastRandProj.__init__`). """ - def _check_error_msg_multiplicity(self, mock_stderr, msg, msg_multiplicity): + def _check_error_msg_multiplicity( + self, + mock_stderr: io.StringIO, + msg: str, + msg_multiplicity: int, + greater_than: bool = True, + ): """ - checks that in `mock_stderr`, the error msg `msg` occurs `msg_multiplicity` - times + Checks that in `mock_stderr`, the error msg `msg` occurs `msg_multiplicity` + times. If 'greater_than' is true, it checks that the `msg` occurs at least + `msg_multiplicity` times. Otherwise, it checks that `msg` occurs exactly + `msg_multiplicity` times. The reason to let `greater_than` as true by default + is that tqdm sometimes displays the "100%" more than once for each progress bar + because it may want to correct its estimation of it/s. In this case, the + tqdm could remove the original "100%" and then re-display "100%" with the + updated estimate of it/s. """ output = mock_stderr.getvalue() - self.assertEqual( - output.count(msg), - msg_multiplicity, - f"Error in progress of batches with output: {repr(output)}", - ) + actual_msg_multiplicity = output.count(msg) + assert isinstance(actual_msg_multiplicity, int) + error_msg = f"Error in progress of batches with output: {repr(output)}" + if greater_than: + self.assertTrue(actual_msg_multiplicity - msg_multiplicity >= 0, error_msg) + else: + self.assertEqual( + actual_msg_multiplicity, + msg_multiplicity, + error_msg, + ) @parameterized.expand( [ From 12a847b942121589a5d7ee832ee168b9ffbcb0a3 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Fri, 26 Aug 2022 18:02:56 -0700 Subject: [PATCH 121/174] Added missing `-> None:` type hint to applicable tests (#1006) Summary: I noticed that some of the tests were missing the `-> None:` type hint. This PR adds the missing type hint. Pull Request resolved: https://github.com/pytorch/captum/pull/1006 Reviewed By: NarineK Differential Revision: D39063574 Pulled By: aobo-y fbshipit-source-id: 90478bb5782bc643e87441529100c8bbc0dc71c7 --- tests/attr/layer/test_layer_lrp.py | 16 +++++------ tests/attr/models/test_base.py | 6 ++--- tests/attr/models/test_pytext.py | 6 ++--- tests/attr/test_approximation_methods.py | 27 ++++++++++--------- tests/attr/test_class_summarizer.py | 4 +-- tests/attr/test_stat.py | 6 ++--- tests/attr/test_summarizer.py | 4 +-- tests/attr/test_utils_batching.py | 10 +++---- .../_core/test_similarity_influence.py | 2 +- tests/influence/_utils/common.py | 6 ++--- tests/insights/test_contribution.py | 6 ++--- tests/insights/test_features.py | 12 ++++----- tests/utils/test_av.py | 2 +- tests/utils/test_common.py | 10 +++---- tests/utils/test_linear_model.py | 10 +++---- 15 files changed, 64 insertions(+), 63 deletions(-) diff --git a/tests/attr/layer/test_layer_lrp.py b/tests/attr/layer/test_layer_lrp.py index 3fc8cd80ea..e4ad951ace 100644 --- a/tests/attr/layer/test_layer_lrp.py +++ b/tests/attr/layer/test_layer_lrp.py @@ -39,12 +39,12 @@ def forward(self, input): class Test(BaseTest): - def test_lrp_creator(self): + def test_lrp_creator(self) -> None: model, _ = _get_basic_config() model.conv1.rule = 1 self.assertRaises(TypeError, LayerLRP, model, model.conv1) - def test_lrp_creator_activation(self): + def test_lrp_creator_activation(self) -> None: model, inputs = _get_basic_config() model.add_module("sigmoid", nn.Sigmoid()) lrp = LayerLRP(model, model.conv1) @@ -77,7 +77,7 @@ def test_lrp_simple_attributions(self): assertTensorAlmostEqual(self, relevance_lower[0], relevance_upper[0]) self.assertEqual(delta.item(), 0) - def test_lrp_simple_repeat_attributions(self): + def test_lrp_simple_repeat_attributions(self) -> None: model, inputs = _get_simple_model() model.eval() model.linear.rule = GammaRule() @@ -88,7 +88,7 @@ def test_lrp_simple_repeat_attributions(self): output_after = model(inputs) assertTensorAlmostEqual(self, output, output_after) - def test_lrp_simple_tanh(self): + def test_lrp_simple_tanh(self) -> None: class Model(nn.Module): def __init__(self) -> None: super(Model, self).__init__() @@ -109,7 +109,7 @@ def forward(self, x): self, relevance[0], torch.Tensor([0.0537, 0.0537, 0.0537]) ) # Result if tanh is skipped for propagation - def test_lrp_simple_attributions_GammaRule(self): + def test_lrp_simple_attributions_GammaRule(self) -> None: model, inputs = _get_simple_model() with torch.no_grad(): model.linear.weight.data[0][0] = -2 @@ -120,7 +120,7 @@ def test_lrp_simple_attributions_GammaRule(self): relevance = lrp.attribute(inputs) assertTensorAlmostEqual(self, relevance[0], torch.tensor([24.0, 36.0, 36.0])) - def test_lrp_simple_attributions_AlphaBeta(self): + def test_lrp_simple_attributions_AlphaBeta(self) -> None: model, inputs = _get_simple_model() with torch.no_grad(): model.linear.weight.data[0][0] = -2 @@ -131,7 +131,7 @@ def test_lrp_simple_attributions_AlphaBeta(self): relevance = lrp.attribute(inputs) assertTensorAlmostEqual(self, relevance[0], torch.tensor([24.0, 36.0, 36.0])) - def test_lrp_simple_attributions_all_layers(self): + def test_lrp_simple_attributions_all_layers(self) -> None: model, inputs = _get_simple_model(inplace=False) model.eval() model.linear.rule = EpsilonRule() @@ -142,7 +142,7 @@ def test_lrp_simple_attributions_all_layers(self): self.assertEqual(len(relevance), 2) assertTensorAlmostEqual(self, relevance[0][0], torch.tensor([18.0, 36.0, 54.0])) - def test_lrp_simple_attributions_all_layers_delta(self): + def test_lrp_simple_attributions_all_layers_delta(self) -> None: model, inputs = _get_simple_model(inplace=False) model.eval() model.linear.rule = EpsilonRule() diff --git a/tests/attr/models/test_base.py b/tests/attr/models/test_base.py index 4ebee39ee2..b8ebbc7763 100644 --- a/tests/attr/models/test_base.py +++ b/tests/attr/models/test_base.py @@ -16,7 +16,7 @@ class Test(unittest.TestCase): - def test_interpretable_embedding_base(self): + def test_interpretable_embedding_base(self) -> None: input1 = torch.tensor([2, 5, 0, 1]) input2 = torch.tensor([3, 0, 0, 2]) model = BasicEmbeddingModel() @@ -59,7 +59,7 @@ def test_interpretable_embedding_base(self): remove_interpretable_embedding_layer(model, interpretable_embedding1) self.assertTrue(model.embedding1.__class__ is Embedding) - def test_custom_module(self): + def test_custom_module(self) -> None: input1 = torch.tensor([[3, 2, 0], [1, 2, 4]]) input2 = torch.tensor([[0, 1, 0], [1, 2, 3]]) model = BasicEmbeddingModel() @@ -81,7 +81,7 @@ def test_custom_module(self): self.assertTrue(model.embedding2.__class__ is TextModule) self._assert_embeddings_equal(input2, output, interpretable_embedding) - def test_nested_multi_embeddings(self): + def test_nested_multi_embeddings(self) -> None: input1 = torch.tensor([[3, 2, 0], [1, 2, 4]]) input2 = torch.tensor([[0, 1, 0], [2, 6, 8]]) input3 = torch.tensor([[4, 1, 0], [2, 2, 8]]) diff --git a/tests/attr/models/test_pytext.py b/tests/attr/models/test_pytext.py index 57f7752865..0f6fdf672b 100644 --- a/tests/attr/models/test_pytext.py +++ b/tests/attr/models/test_pytext.py @@ -52,7 +52,7 @@ def setUp(self): self.model = self._create_dummy_model() self.data_handler = self._create_dummy_data_handler() - def tearDown(self): + def tearDown(self) -> None: for f in ( self.embedding_file, self.word_embedding_file, @@ -68,7 +68,7 @@ def tearDown(self): ): os.remove(p) - def test_word_embeddings(self): + def test_word_embeddings(self) -> None: embedding_list = configure_model_integ_grads_embeddings(self.model) integrated_gradients_embedding = embedding_list[0] input = torch.arange(0, 300).unsqueeze(0).unsqueeze(0) @@ -81,7 +81,7 @@ def test_word_embeddings(self): ) ) - def test_baseline_generation(self): + def test_baseline_generation(self) -> None: baseline_generator = BaselineGenerator(self.model, self.data_handler, "cpu") embedding_list = configure_model_integ_grads_embeddings(self.model) integrated_gradients_embedding = embedding_list[0] diff --git a/tests/attr/test_approximation_methods.py b/tests/attr/test_approximation_methods.py index 54a517b596..f068d56304 100644 --- a/tests/attr/test_approximation_methods.py +++ b/tests/attr/test_approximation_methods.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import unittest +from typing import List import torch from captum.attr._utils.approximation_methods import Riemann, riemann_builders @@ -8,16 +9,16 @@ class Test(unittest.TestCase): - def __init__(self, methodName="runTest") -> None: + def __init__(self, methodName: str = "runTest") -> None: super().__init__(methodName) - def test_riemann_0(self): + def test_riemann_0(self) -> None: with self.assertRaises(AssertionError): step_sizes, alphas = riemann_builders() step_sizes(0) alphas(0) - def test_riemann_2(self): + def test_riemann_2(self) -> None: expected_step_sizes_lrm = [0.5, 0.5] expected_step_sizes_trapezoid = [0.25, 0.25] expected_left = [0.0, 0.5] @@ -34,7 +35,7 @@ def test_riemann_2(self): expected_trapezoid, ) - def test_riemann_3(self): + def test_riemann_3(self) -> None: expected_step_sizes = [1 / 3] * 3 expected_step_sizes_trapezoid = [1 / 6, 1 / 3, 1 / 6] expected_left = [0.0, 1 / 3, 2 / 3] @@ -51,7 +52,7 @@ def test_riemann_3(self): expected_trapezoid, ) - def test_riemann_4(self): + def test_riemann_4(self) -> None: expected_step_sizes = [1 / 4] * 4 expected_step_sizes_trapezoid = [1 / 8, 1 / 4, 1 / 4, 1 / 8] expected_left = [0.0, 0.25, 0.5, 0.75] @@ -70,14 +71,14 @@ def test_riemann_4(self): def _assert_steps_and_alphas( self, - n, - expected_step_sizes, - expected_step_sizes_trapezoid, - expected_left, - expected_right, - expected_middle, - expected_trapezoid, - ): + n: int, + expected_step_sizes: List[float], + expected_step_sizes_trapezoid: List[float], + expected_left: List[float], + expected_right: List[float], + expected_middle: List[float], + expected_trapezoid: List[float], + ) -> None: step_sizes_left, alphas_left = riemann_builders(Riemann.left) step_sizes_right, alphas_right = riemann_builders(Riemann.right) step_sizes_middle, alphas_middle = riemann_builders(Riemann.middle) diff --git a/tests/attr/test_class_summarizer.py b/tests/attr/test_class_summarizer.py index 7009cca788..0d7517da2d 100644 --- a/tests/attr/test_class_summarizer.py +++ b/tests/attr/test_class_summarizer.py @@ -78,7 +78,7 @@ def create_batch_labels(batch_idx): ): self.class_test(data, classes, sizes) - def test_no_class(self): + def test_no_class(self) -> None: size = (30, 20) summarizer = ClassSummarizer(stats=CommonStats()) for _ in range(10): @@ -95,7 +95,7 @@ def test_no_class(self): self.assertIsInstance(summarizer.class_summaries, dict) self.assertEqual(len(summarizer.class_summaries), 0) - def test_single_label(self): + def test_single_label(self) -> None: size = (4, 3, 2, 1) data = torch.randn((100,) + size) diff --git a/tests/attr/test_stat.py b/tests/attr/test_stat.py index 9559b1b237..0489472972 100644 --- a/tests/attr/test_stat.py +++ b/tests/attr/test_stat.py @@ -15,7 +15,7 @@ def get_values(n=100, lo=None, hi=None, integers=False): class Test(BaseTest): - def test_div0(self): + def test_div0(self) -> None: summarizer = Summarizer([Var(), Mean()]) summ = summarizer.summary self.assertIsNone(summ) @@ -30,7 +30,7 @@ def test_div0(self): assertTensorAlmostEqual(self, summ["mean"], 10) assertTensorAlmostEqual(self, summ["variance"], 0) - def test_var_defin(self): + def test_var_defin(self) -> None: """ Variance is avg squared distance to mean. Thus it should be positive. This test is to ensure this is the case. @@ -63,7 +63,7 @@ def test_var_defin(self): assertTensorAlmostEqual(self, var, actual_var) self.assertTrue((var > 0).all()) - def test_multi_dim(self): + def test_multi_dim(self) -> None: x1 = torch.tensor([1.0, 2.0, 3.0, 4.0]) x2 = torch.tensor([2.0, 1.0, 2.0, 4.0]) x3 = torch.tensor([3.0, 3.0, 1.0, 4.0]) diff --git a/tests/attr/test_summarizer.py b/tests/attr/test_summarizer.py index 1b8d6859a2..67dc2e53e5 100644 --- a/tests/attr/test_summarizer.py +++ b/tests/attr/test_summarizer.py @@ -5,7 +5,7 @@ class Test(BaseTest): - def test_single_input(self): + def test_single_input(self) -> None: size = (2, 3) summarizer = Summarizer(stats=CommonStats()) for _ in range(10): @@ -19,7 +19,7 @@ def test_single_input(self): for k in summ: self.assertTrue(summ[k].size() == size) - def test_multi_input(self): + def test_multi_input(self) -> None: size1 = (10, 5, 5) size2 = (3, 5) diff --git a/tests/attr/test_utils_batching.py b/tests/attr/test_utils_batching.py index 89cd8b0407..30c99e1d8d 100644 --- a/tests/attr/test_utils_batching.py +++ b/tests/attr/test_utils_batching.py @@ -10,7 +10,7 @@ class Test(BaseTest): - def test_tuple_splice_range(self): + def test_tuple_splice_range(self) -> None: test_tuple = ( torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]]), "test", @@ -21,7 +21,7 @@ def test_tuple_splice_range(self): self.assertEqual(spliced_tuple[1], "test") assertTensorAlmostEqual(self, spliced_tuple[2], [[0, 1, 2], [3, 4, 5]]) - def test_tuple_splice_range_3d(self): + def test_tuple_splice_range_3d(self) -> None: test_tuple = ( torch.tensor([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [6, 7, 8]]]), "test", @@ -30,7 +30,7 @@ def test_tuple_splice_range_3d(self): assertTensorAlmostEqual(self, spliced_tuple[0], [[[6, 7, 8], [6, 7, 8]]]) self.assertEqual(spliced_tuple[1], "test") - def test_batched_generator(self): + def test_batched_generator(self) -> None: def sample_operator(inputs, additional_forward_args, target_ind, scale): return ( scale * (sum(inputs)), @@ -55,12 +55,12 @@ def sample_operator(inputs, additional_forward_args, target_ind, scale): self.assertEqual(add[1], 5) self.assertEqual(targ, 7) - def test_batched_operator_0_bsz(self): + def test_batched_operator_0_bsz(self) -> None: inp1 = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) with self.assertRaises(AssertionError): _batched_operator(lambda x: x, inputs=inp1, internal_batch_size=0) - def test_batched_operator(self): + def test_batched_operator(self) -> None: def _sample_operator(inputs, additional_forward_args, target_ind, scale): return ( scale * (sum(inputs)), diff --git a/tests/influence/_core/test_similarity_influence.py b/tests/influence/_core/test_similarity_influence.py index ec08bf6cf1..395762a5b2 100644 --- a/tests/influence/_core/test_similarity_influence.py +++ b/tests/influence/_core/test_similarity_influence.py @@ -36,7 +36,7 @@ def __init__(self, low, high, num_features) -> None: .transpose(1, 0) ) - def __len__(self): + def __len__(self) -> int: return len(self.samples) def __getitem__(self, idx): diff --git a/tests/influence/_utils/common.py b/tests/influence/_utils/common.py index 3ab018e5b9..999dc6404f 100644 --- a/tests/influence/_utils/common.py +++ b/tests/influence/_utils/common.py @@ -46,7 +46,7 @@ def __init__(self, samples, labels, use_gpu=False) -> None: ) self.labels = self.labels.cuda() - def __len__(self): + def __len__(self) -> int: return len(self.samples) def __getitem__(self, idx): @@ -64,7 +64,7 @@ def __init__(self, samples, labels, use_gpu=False) -> None: ) self.labels = self.labels.cuda() - def __len__(self): + def __len__(self) -> int: return len(self.samples[0]) def __getitem__(self, idx): @@ -254,7 +254,7 @@ def __init__( self.name = name if name else data_influence_class.__name__ self.kwargs = kwargs - def __repr__(self): + def __repr__(self) -> str: return self.name def __call__( diff --git a/tests/insights/test_contribution.py b/tests/insights/test_contribution.py index 56b5f26aaa..3b6f517419 100644 --- a/tests/insights/test_contribution.py +++ b/tests/insights/test_contribution.py @@ -26,7 +26,7 @@ def __init__( visualization_transform=None, ) - def visualization_type(self): + def visualization_type(self) -> str: return "real" def visualize(self, attribution, data, contribution_frac) -> FeatureOutput: @@ -135,7 +135,7 @@ def to_iter(data_loader): class Test(BaseTest): - def test_one_feature(self): + def test_one_feature(self) -> None: batch_size = 2 classes = _get_classes() dataset = list( @@ -169,7 +169,7 @@ def test_one_feature(self): total_contrib = sum(abs(f.contribution) for f in output[0].feature_outputs) self.assertAlmostEqual(total_contrib, 1.0, places=6) - def test_multi_features(self): + def test_multi_features(self) -> None: batch_size = 2 classes = _get_classes() img_dataset = list( diff --git a/tests/insights/test_features.py b/tests/insights/test_features.py index b89bab09ea..2f2e07cc06 100644 --- a/tests/insights/test_features.py +++ b/tests/insights/test_features.py @@ -16,11 +16,11 @@ class TestTextFeature(BaseTest): FEATURE_NAME = "question" - def test_text_feature_returns_text_as_visualization_type(self): + def test_text_feature_returns_text_as_visualization_type(self) -> None: feature = TextFeature(self.FEATURE_NAME, None, None, None) self.assertEqual(feature.visualization_type(), "text") - def test_text_feature_uses_visualization_transform_if_provided(self): + def test_text_feature_uses_visualization_transform_if_provided(self) -> None: input_data = torch.rand(2, 2) transformed_data = torch.rand(1, 1) @@ -55,7 +55,7 @@ def mock_transform(data): # has original data self.assertIs(feature_output.base, input_data) - def test_text_feature_generates_correct_visualization_output(self): + def test_text_feature_generates_correct_visualization_output(self) -> None: attribution = torch.tensor([0.1, 0.2, 0.3, 0.4]) input_data = torch.rand(1, 2) expected_modified = [100 * x for x in (attribution / attribution.max())] @@ -81,7 +81,7 @@ def test_text_feature_generates_correct_visualization_output(self): class TestEmptyFeature(BaseTest): - def test_empty_feature_should_generate_fixed_output(self): + def test_empty_feature_should_generate_fixed_output(self) -> None: feature = EmptyFeature() contribution = torch.rand(1).item() expected_output = FeatureOutput( @@ -96,7 +96,7 @@ def test_empty_feature_should_generate_fixed_output(self): class TestImageFeature(BaseTest): - def test_image_feature_generates_correct_ouput(self): + def test_image_feature_generates_correct_ouput(self) -> None: attribution = torch.zeros(1, 3, 4, 4) data = torch.ones(1, 3, 4, 4) contribution = 1.0 @@ -134,7 +134,7 @@ def mock_viz_attr(*args, **kwargs): class TestGeneralFeature(BaseTest): - def test_general_feature_generates_correct_output(self): + def test_general_feature_generates_correct_output(self) -> None: name = "general_feature" categories = ["cat1", "cat2", "cat3", "cat4"] attribution = torch.Tensor(1, 4) diff --git a/tests/utils/test_av.py b/tests/utils/test_av.py index 956bcd34de..301f04ecb9 100644 --- a/tests/utils/test_av.py +++ b/tests/utils/test_av.py @@ -20,7 +20,7 @@ def __init__(self, low, high, num_features) -> None: .transpose(1, 0) ) - def __len__(self): + def __len__(self) -> int: return len(self.samples) def __getitem__(self, idx): diff --git a/tests/utils/test_common.py b/tests/utils/test_common.py index e19c3c26b9..0a86c96e64 100644 --- a/tests/utils/test_common.py +++ b/tests/utils/test_common.py @@ -14,14 +14,14 @@ class Test(BaseTest): - def test_safe_div_number_denom(self): + def test_safe_div_number_denom(self) -> None: num = torch.tensor(4.0) assert safe_div(num, 2) == 2.0 assert safe_div(num, 0, 2) == 2.0 assert safe_div(num, 2.0) == 2.0 assert safe_div(num, 0.0, 2.0) == 2.0 - def test_safe_div_tensor_denom(self): + def test_safe_div_tensor_denom(self) -> None: num = torch.tensor([4.0, 6.0]) exp = torch.tensor([2.0, 3.0]) @@ -41,7 +41,7 @@ def test_safe_div_tensor_denom(self): # float default denom assert (safe_div(num, torch.tensor([0.0, 0.0]), 2.0) == exp).all() - def test_reduce_list_tensors(self): + def test_reduce_list_tensors(self) -> None: tensors = [torch.tensor([[3, 4, 5]]), torch.tensor([[0, 1, 2]])] reduced = _reduce_list(tensors) assertTensorAlmostEqual(self, reduced, [[3, 4, 5], [0, 1, 2]]) @@ -55,7 +55,7 @@ def test_reduce_list_tuples(self): assertTensorAlmostEqual(self, reduced[0], [[3, 4, 5], [3, 4, 5]]) assertTensorAlmostEqual(self, reduced[1], [[0, 1, 2], [0, 1, 2]]) - def test_sort_key_list(self): + def test_sort_key_list(self) -> None: key_list = [ torch.device("cuda:13"), torch.device("cuda:17"), @@ -67,7 +67,7 @@ def test_sort_key_list(self): for i in range(len(key_list)): self.assertEqual(sorted_keys[i].index, device_index_list[i]) - def test_sort_key_list_incomplete(self): + def test_sort_key_list_incomplete(self) -> None: key_list = [torch.device("cuda:10"), torch.device("cuda:0")] device_index_list = [0, 10, 13, 17] sorted_keys = _sort_key_list(key_list, device_index_list) diff --git a/tests/utils/test_linear_model.py b/tests/utils/test_linear_model.py index fcbc5e5272..ac38a77d29 100644 --- a/tests/utils/test_linear_model.py +++ b/tests/utils/test_linear_model.py @@ -121,7 +121,7 @@ def train_and_compare( h /= h.norm(p=2) assertTensorAlmostEqual(self, h, expected_hyperplane, delta=delta) - def test_simple_linear_regression(self): + def test_simple_linear_regression(self) -> None: xs = torch.randn(TestLinearModel.MAX_POINTS, 1) ys = 3 * xs + 1 @@ -152,7 +152,7 @@ def test_simple_linear_regression(self): delta=0.2, ) - def test_simple_multi_output(self): + def test_simple_multi_output(self) -> None: xs = torch.randn(TestLinearModel.MAX_POINTS, 1) y1 = 3 * xs + 1 y2 = -5 * xs @@ -167,7 +167,7 @@ def test_simple_multi_output(self): objective="ols", ) - def test_simple_linear_classification(self): + def test_simple_linear_classification(self) -> None: xs = torch.tensor([[0.5, 0.5], [-0.5, -0.5], [0.5, -0.5], [-0.5, 0.5]]) ys = torch.tensor([1.0, -1.0, 1.0, -1.0]) self.train_and_compare( @@ -201,7 +201,7 @@ def test_simple_linear_classification(self): SGDRidge, xs, ys, expected_loss=0.25, expected_reg=0, objective="ridge" ) - def test_simple_xor_problem(self): + def test_simple_xor_problem(self) -> None: r""" ^ o | x @@ -246,7 +246,7 @@ def test_simple_xor_problem(self): bias=False, ) - def test_weighted_problem(self): + def test_weighted_problem(self) -> None: r""" ^ 0 | x From 81858f3c37e0a09ab183028660485e53b5427255 Mon Sep 17 00:00:00 2001 From: Oliver Aobo Yang Date: Wed, 31 Aug 2022 16:29:55 -0700 Subject: [PATCH 122/174] add docstring style in developer guide (#1016) Summary: Write down the agreed docstring style in the developer guide Pull Request resolved: https://github.com/pytorch/captum/pull/1016 Reviewed By: NarineK Differential Revision: D39077183 Pulled By: aobo-y fbshipit-source-id: e3b5fab665e6697902d309082d8f4a31d8c52c82 --- docs/contribution_guide.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/contribution_guide.md b/docs/contribution_guide.md index 731e12bfc0..f8aacf1c85 100644 --- a/docs/contribution_guide.md +++ b/docs/contribution_guide.md @@ -77,3 +77,20 @@ https://captum.ai/tutorials/House_Prices_Regression_Interpret **Multimodal** - You can use VQA model and dataset described here: https://captum.ai/tutorials/Multimodal_VQA_Captum_Insights + + +## Docstring style + +Docstring is required for all public APIs to provide users the details of the arguments and returns. [Our API documentation](https://captum.ai/api/) is generated from the docstring. Captum adopts a customized docstring format modified on top of [Google style](https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html). Specifically, each argument should be listed as `arg_name (type): description` in the `Args:` section. The argument typing convention: +- primitive types: `int`, `str`, `float`, `bool` +- common collection types: `list`, `tuple`, `dict` + - [PEP 585](https://peps.python.org/pep-0585/#implementation) has deprecated the duplicate types: `List`, `Tuple`, `Dict` + - element types: `list[int]`, `dict[int, str]` +- other foundamental types: `Any`, `Callable`, `Iterable` +- class types: `MyClass`, `external_lib.SomeClass` +- omit `torch` for common Pytorch types: `Tensor`, `nn.Module` +- use `or` and `,` for union types: `type1 or type2`, `type1, tyep2, or type3` + - [PEP 604](https://peps.python.org/pep-0604/) proposes to use `|` to connect types: `type1 | type2`. We may consider migration later. +- append `optional` for argument with default value: `int, optional` + - append default value to the end of the description: `Default: None` + - Notice this is different with python's type hint `Optional[...]`, which indicate if the argument can be `None` From 5a5eb7842259bb3e2e8e64d6589d66a63711c35b Mon Sep 17 00:00:00 2001 From: Yassine EL KHAL Date: Tue, 6 Sep 2022 18:25:59 -0700 Subject: [PATCH 123/174] doc: rectify layerlrp example (#1017) Summary: This PR aims to rectify the LayerLRP example since it's talking about LayerLRP and not LRP. Moreover LRP doesn't require 2 arguments. Pull Request resolved: https://github.com/pytorch/captum/pull/1017 Reviewed By: NarineK Differential Revision: D39220576 Pulled By: aobo-y fbshipit-source-id: d3466f5193cea053049dbf28da4204cea350b4a5 --- captum/attr/_core/layer/layer_lrp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/captum/attr/_core/layer/layer_lrp.py b/captum/attr/_core/layer/layer_lrp.py index e72bbbaddc..bdc328f47e 100644 --- a/captum/attr/_core/layer/layer_lrp.py +++ b/captum/attr/_core/layer/layer_lrp.py @@ -204,10 +204,10 @@ def attribute( >>> # and returns an Nx10 tensor of class probabilities. It has one >>> # Conv2D and a ReLU layer. >>> net = ImageClassifier() - >>> lrp = LRP(net, net.conv1) + >>> layer_lrp = LayerLRP(net, net.conv1) >>> input = torch.randn(3, 3, 32, 32) >>> # Attribution size matches input size: 3x3x32x32 - >>> attribution = lrp.attribute(input, target=5) + >>> attribution = layer_lrp.attribute(input, target=5) """ self.verbose = verbose From ff2b403965f99f7ff9eb64cdf9370c1044390080 Mon Sep 17 00:00:00 2001 From: Fulton Wang Date: Wed, 14 Sep 2022 17:02:22 -0700 Subject: [PATCH 124/174] expose `intermediate_quantities` in `TracInCPFastRandProj`, new Summary: - adds `intermediate_quantities` method for `TracInCPFastRandProj`, which accepts the same inputs as the public `self_influence` method - `_get_intermediate_quantities_tracincp_fast_rand_proj`, which is called by `intermediate_quantities`, now does outer iteration over checkpoints, instead of batches. - adds a new test file, `test_tracin_intermediate_quantities`, which checks that 1) calling `intermediate_quantities` on a single large batch vs a dataloader yielding the same examples as the large batch gives the same results 2) using the intermediate quantities from `TracInCPFastRandProj.intermediate_quantities` to manually compute influence scores gives the same results as directly using `TracInCPFast`, when the former does not use any random projections. - TODO: adding a `outer_loop_by_checkpoints` option to `intermediate_quantities`, and using `intermediate_quantities` in `TracInCPFastRandProj.__init__` to compute the intermediate quantities for the train dataset, also with a `outer_loop_by_checkpoints` option that defaults to False - TODO: adding progress bar for computation of intermediate quantities (previously implemented that in D34803544, but probably easier to create new diff on in this stack) Reviewed By: NarineK Differential Revision: D37968593 fbshipit-source-id: 0f1f5d48b5cf0d3c09826b8b24f978f819f3eb5d --- .../_core/tracincp_fast_rand_proj.py | 266 ++++++++++++++---- .../test_tracin_intermediate_quantities.py | 201 +++++++++++++ 2 files changed, 416 insertions(+), 51 deletions(-) create mode 100644 tests/influence/_core/test_tracin_intermediate_quantities.py diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py index 7206812042..114d4c45bd 100644 --- a/captum/influence/_core/tracincp_fast_rand_proj.py +++ b/captum/influence/_core/tracincp_fast_rand_proj.py @@ -68,6 +68,15 @@ class TracInCPFast(TracInCPBase): computes influence scores for that special case. Note that the computed influence scores are exactly the same as when naive back-propagation is used - there is no loss in accuracy. + + In more detail regarding the influence score computation: let :math`x` + and :math`\nabla_y f(y)` be the input and output-gradient of the last + fully-connected layer, respectively, for a training example. Similarly, let + :math`x'` and :math`\nabla_{y'} f(y')` be the corresponding quantities for + a test example. Then, the influence score of the training example on the test + example is the sum of the contribution from each checkpoint. The contribution from + a given checkpoint is :math`(x^T x')(\nabla_y f(y)^T \nabla_{y'} f(y'))`. + """ def __init__( @@ -312,7 +321,9 @@ def _influence_batch_tracincp_fast( batch: Tuple[Any, ...], ): """ - computes influence scores for a single training batch + computes influence scores for a single training batch, when only considering + gradients in the last fully-connected layer, using the computation trick + described in the `TracInCPFast` class description. """ def get_checkpoint_contribution(checkpoint): @@ -333,8 +344,15 @@ def get_checkpoint_contribution(checkpoint): self, batch[0:-1], batch[-1] ) return ( - _tensor_batch_dot(input_jacobians, src_jacobian) + _tensor_batch_dot( + input_jacobians, src_jacobian + ) # shape is (test batch size, training batch size), containing x^T x' + # for every example x in the training batch and example x' in the test + # batch * _tensor_batch_dot(input_layer_inputs, src_layer_input) + # shape is (test batch size, training batch size), containing + # (\nabla_y f(y)^T \nabla_{y'} f(y')) for every label y in the training + # batch and label y' in the test batch * learning_rate ) @@ -707,6 +725,14 @@ def _basic_computation_tracincp_fast( that `model(*inputs)` produces the predictions for the batch. targets (tensor): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. + + Returns: + (input_jacobians, layer_inputs) (tuple): `input_jacobians` is a 2D tensor, + where each row is the jacobian of the loss, with respect to the + *output* of the last fully-connected layer. `layer_inputs` is a 1D + tensor, where each row is the *input* to the last fully-connected + layer. For both, the length is the number of examples in the batch + represented by `inputs` and `targets`. """ layer_inputs: Dict[device, Tuple[Tensor, ...]] = defaultdict() lock = threading.Lock() @@ -760,6 +786,57 @@ def _capture_inputs(layer, input, output) -> None: class TracInCPFastRandProj(TracInCPFast): + r""" + A version of TracInCPFast which is optimized for "interactive" calls to + `influence` for the purpose of calculating proponents / opponents, or + influence scores. "Interactive" means there will be multiple calls to + `influence`, with each call for a different batch of test examples, and + subsequent calls rely on the results of previous calls. The implementation in + this class has been optimized so that each call to `influence` is fast, so that + it can be used for interactive analysis. This class should only be used for + interactive use cases. It should not be used if `influence` will only be + called once, because to enable fast calls to `influence`, time and memory + intensive preprocessing is required in `__init__`. Furthermore, it should not + be used to calculate self influence scores - `TracInCPFast` should be used + instead for that purpose. To enable interactive analysis, this implementation + computes and saves "embedding" vectors for all training examples in + `train_dataset`. Crucially, the influence score of a training + example on a test example is simply the dot-product of their corresponding + vectors, and proponents / opponents can be found by first storing vectors for + training examples in a nearest-neighbor data structure, and then finding the + nearest-neighbors for a test example in terms of dot-product (see appendix F + of the TracIn paper). This class should only be used if calls to `influence` + to obtain proponents / opponents or influence scores will be made in an + "interactive" manner, and there is sufficient memory to store vectors for the + entire `train_dataset`. This is because in order to enable interactive + analysis, this implementation incures overhead in ``__init__` to setup the + nearest-neighbors data structure, which is both time and memory intensive, as + vectors corresponding to all training examples needed to be stored. To reduce + memory usage, this implementation enables random projections of those vectors. + Note that the influence scores computed with random projections are less + accurate, though correct in expectation. + + In more detail regarding the "embedding" vectors - the influence of a training + example on a test example, when only considering gradients in the last + fully-connected layer, the sum of the contribution from each checkpoint. The + contribution from a given checkpoint is + :math`(x^T x')(\nabla_y f(y)^T \nabla_{y'} f(y'))`, using the notation in the + description of `TracInCPFast`. As is, this is not a dot-product of 2 vectors. + However, we can rewrite that contribution as + :math`(x \nabla_y f(y)^T) \dot (x' f(y')^T)`. Both terms in this + product are 2D matrices, as they are outer products, and the "product" is actually + a dot-product, treating both matrices as vectors. Therefore, for a given + checkpoint, its contribution to the "embedding" of an example is just the + outer-product :math`(x \nabla_y f(y)^T)`, flattened. Furthemore, to reduce the + dimension of this contribution, we can right-multiply and + left-multiply the outer-product with two separate projection matrices. These + transform :math`\nabla_y f(y)` and :math`x` to lower dimensional vectors. While + the dimension of these two lower dimensional vectors do not necessarily need to + be the same, in our implementation, we let them be the same, both equal to the + square root of the desired projection dimension. Finally, the embedding of an + example is the concatenation of the contributions from each checkpoint. + """ + def __init__( self, model: Module, @@ -775,35 +852,6 @@ def __init__( seed: int = 0, ) -> None: r""" - A version of TracInCPFast which is optimized for "interactive" calls to - `influence` for the purpose of calculating proponents / opponents, or - influence scores. "Interactive" means there will be multiple calls to - `influence`, with each call for a different batch of test examples, and - subsequent calls rely on the results of previous calls. The implementation in - this class has been optimized so that each call to `influence` is fast, so that - it can be used for interactive analysis. This class should only be used for - interactive use cases. It should not be used if `influence` will only be - called once, because to enable fast calls to `influence`, time and memory - intensive preprocessing is required in `__init__`. Furthermore, it should not - be used to calculate self influence scores - `TracInCPFast` should be used - instead for that purpose. To enable interactive analysis, this implementation - computes and saves "embedding" vectors for all training examples in - `train_dataset`. Crucially, the influence score of a training - example on a test example is simply the dot-product of their corresponding - vectors, and proponents / opponents can be found by first storing vectors for - training examples in a nearest-neighbor data structure, and then finding the - nearest-neighbors for a test example in terms of dot-product (see appendix F - of the TracIn paper). This class should only be used if calls to `influence` - to obtain proponents / opponents or influence scores will be made in an - "interactive" manner, and there is sufficient memory to store vectors for the - entire `train_dataset`. This is because in order to enable interactive - analysis, this implementation incures overhead in ``__init__` to setup the - nearest-neighbors data structure, which is both time and memory intensive, as - vectors corresponding to all training examples needed to be stored. To reduce - memory usage, this implementation enables random projections of those vectors. - Note that the influence scores computed with random projections are less - accurate, though correct in expectation. - Args: model (torch.nn.Module): An instance of pytorch model. This model should define all of its layers as attributes of the model. @@ -963,6 +1011,8 @@ def _influence( # type: ignore[override] example, `influence_scores[i][j]` is the influence score for the j-th training example to the i-th input example. """ + # TODO: after D35721609 lands, use helper function + # `TracInCP._influence_rand_proj` here to avoid duplicated logic inputs_batch = (*inputs, targets) input_projections = self._get_intermediate_quantities_tracincp_fast_rand_proj( DataLoader( @@ -1323,7 +1373,7 @@ def _process_src_intermediate_quantities_tracincp_fast_rand_proj( def _get_intermediate_quantities_tracincp_fast_rand_proj( self, - dataloader: DataLoader, + inputs_dataset: Union[Tuple[Any, ...], DataLoader], projection_quantities: Optional[Tuple[torch.Tensor, torch.Tensor]], ) -> torch.Tensor: r""" @@ -1335,14 +1385,23 @@ def _get_intermediate_quantities_tracincp_fast_rand_proj( specifically, largest dot-product) data structure. Args: - dataloader (DataLoader): DataLoader for which the intermediate quantities - are computed. + inputs_dataset (Tuple, or DataLoader): Either a single tuple of any, or a + `DataLoader`, where each batch yielded is a tuple of any. In + either case, the tuple represents a single batch, where the last + element is assumed to be the labels for the batch. That is, + `model(*batch[0:-1])` produces the output for `model`, and + and `batch[-1]` are the labels, if any. Here, `model` is model + provided in initialization. This is the same assumption made for + each batch yielded by training dataset `train_dataset`. Please see + documentation for the `train_dataset` argument to + `TracInCPFastRandProj.__init__` for more details on the assumed + structure of a batch. projection_quantities (tuple or None): Is either the two tensors defining the randomized projections to apply, or None, which means no projection is to be applied. Returns: - checkpoint_projections (tensor): A tensor of dimension + intermediate_quantities (tensor): A tensor of dimension (N, D * C), where N is total number of examples in `dataloader`, C is the number of checkpoints passed as the `checkpoints` argument of `TracInCPFastRandProj.__init__`, and each row represents the @@ -1360,16 +1419,32 @@ def _get_intermediate_quantities_tracincp_fast_rand_proj( the variable d in the top of page 15 of the TracIn paper: https://arxiv.org/pdf/2002.08484.pdf. """ - # for each checkpoint, this stores a list of projections for a batch - # each element in this list will be of shape (batch_size, projection_dim) - checkpoint_projections: List[Any] = [[] for _ in self.checkpoints] + # if `inputs_dataset` is not a `DataLoader`, turn it into one. + inputs_dataset = _format_inputs_dataset(inputs_dataset) + # internally, whether `projection_quantities` is None determines whether + # any projection will be applied to reduce the dimension of the "embedding" + # vectors. If projection will be applied, there are actually 2 different + # projection matrices - one to project the `input_jacobians`, and one to + # project the `layer_inputs`. See below for details of those two quantities. + # here, we extract the corresponding projection matrices for those two + # quantities, if doing projection. Note that the same projections are used + # for each checkpoint. project = False if projection_quantities is not None: project = True jacobian_projection, layer_input_projection = projection_quantities + # for each checkpoint, we will populate a list containing the contribution of + # the checkpoint for each batch + checkpoint_contributions: List[Union[List, Tensor]] = [ + [] for _ in self.checkpoints + ] + + # the "embedding" vector is the concatenation of contributions from each + # checkpoint, which we compute one by one for (j, checkpoint) in enumerate(self.checkpoints): + assert ( checkpoint is not None ), "None returned from `checkpoints`, cannot load." @@ -1377,30 +1452,119 @@ def _get_intermediate_quantities_tracincp_fast_rand_proj( learning_rate = self.checkpoints_load_func(self.model, checkpoint) learning_rate_root = learning_rate**0.5 - for batch in dataloader: - - batch_jacobians, batch_layer_inputs = _basic_computation_tracincp_fast( + # after loading a checkpoint, we compute the contribution of that + # checkpoint, for *all* batches (instead of a single batch). this enables + # increased efficiency. + for batch in inputs_dataset: + + # compute `input_jacobians` and `layer_inputs`, for a given checkpoint + # using a helper function. `input_jacobians` is a 2D tensor, + # where each row is the jacobian of the loss, with respect to the + # *output* of the last fully-connected layer. `layer_inputs` is a 2D + # tensor, where each row is the *input* to the last fully-connected + # layer. For both, the length is the number of examples in `batch` + input_jacobians, layer_inputs = _basic_computation_tracincp_fast( self, batch[0:-1], batch[-1], ) + # if doing projection, project those two quantities if project: - batch_jacobians = torch.matmul(batch_jacobians, jacobian_projection) + input_jacobians = torch.matmul(input_jacobians, jacobian_projection) - batch_layer_inputs = torch.matmul( - batch_layer_inputs, layer_input_projection - ) + layer_inputs = torch.matmul(layer_inputs, layer_input_projection) - checkpoint_projections[j].append( + # for an example, the contribution to the "embedding" vector from each + # checkpoint is the outer product of its `input_jacobian` and its + # `layer_input`, flattened to a 1D tensor. here, we perform this + # for the entire batch. we append the contribution to a list containing + # the contribution of all batches, from the checkpoint. + cast(list, checkpoint_contributions[j]).append( torch.matmul( - torch.unsqueeze(batch_jacobians, 2), - torch.unsqueeze(batch_layer_inputs, 1), - ).flatten(start_dim=1) + torch.unsqueeze( + input_jacobians, 2 + ), # size is (batch_size, output_size, 1) + torch.unsqueeze( + layer_inputs, 1 + ), # size is (batch_size, 1, input_size) + ).flatten( + start_dim=1 + ) # matmul does a batched matrix multiplication to return a 3D + # tensor. each element along the batch (0-th) dimension is the + # matrix product of a (output_size, 1) and (1, input_size) tensor + # in other words, each element is an outer product, and the matmul + # is just doing a batched outer product. this is what we want, as + # the contribution to the "embedding" for an example is the outer + # product of the last layer's input and the gradient of its output. + # finally, we flatten the 3rd dimension so that the contribution to + # the embedding for this checkpoint is a 2D tensor, i.e. each + # example's contribution to the embedding is a 1D tensor. * learning_rate_root ) - checkpoint_projections[j] = torch.cat(checkpoint_projections[j], dim=0) + # once we have computed the contribution from each batch, for a given + # checkpoint, we concatenate them along the batch dimension to get a + # single 2D tensor for that checkpoint + checkpoint_contributions[j] = torch.cat( + checkpoint_contributions[j], dim=0 # type: ignore + ) + + # finally, we concatenate along the checkpoint dimension, to get a tensor of + # shape (batch_size, projection_dim * number of checkpoints) + # each row in this result is the "embedding" vector for an example in `batch` + return torch.cat(checkpoint_contributions, dim=1) # type: ignore + + def compute_intermediate_quantities( + self, + inputs_dataset: Union[Tuple[Any, ...], DataLoader], + ) -> Tensor: + """ + Computes "embedding" vectors for all examples in a single batch, or a + `Dataloader` that yields batches. These embedding vectors are constructed so + that the influence score of a training example on a test example is simply the + dot-product of their corresponding vectors. Please see the documentation for + `TracInCPFastRandProj.__init__` for more details. Allowing a `DataLoader` + yielding batches to be passed in (as opposed to a single batch) gives the + potential to improve efficiency, because we load each checkpoint only once in + this method call. Thus if a `DataLoader` yielding batches is passed in, this + reduces the total number of times each checkpoint is loaded for a dataset, + compared to if a single batch is passed in. The reason we do not just increase + the batch size is that for large models, large batches do not fit in memory. + + Args: + inputs_dataset (Tuple, or DataLoader): Either a single tuple of any, or a + `DataLoader`, where each batch yielded is a tuple of any. In + either case, the tuple represents a single batch, where the last + element is assumed to be the labels for the batch. That is, + `model(*batch[0:-1])` produces the output for `model`, and + and `batch[-1]` are the labels, if any. Here, `model` is model + provided in initialization. This is the same assumption made for + each batch yielded by training dataset `train_dataset`. Please see + documentation for the `train_dataset` argument to + `TracInCPFastRandProj.__init__` for more details on the assumed + structure of a batch. - return torch.cat(checkpoint_projections, dim=1) + Returns: + intermediate_quantities (tensor): A tensor of dimension + (N, D * C), where N is total number of examples in + `inputs_dataset`, C is the number of checkpoints passed as the + `checkpoints` argument of `TracInCPFastRandProj.__init__`, and each + row represents the vector for an example. Regarding D: Let I be the + dimension of the output of the last fully-connected layer times the + dimension of the input of the last fully-connected layer. If + `self.projection_dim` is specified in initialization, + D = min(I * C, `self.projection_dim` * C). Otherwise, D = I * C. + In summary, if `self.projection_dim` is None, the dimension of each + vector will be determined by the size of the input and output of + the last fully-connected layer of `model`. Otherwise, + `self.projection_dim` must be an int, and random projection will be + performed to ensure that the vector is of dimension no more than + `self.projection_dim` * C. `self.projection_dim` corresponds to + the variable d in the top of page 15 of the TracIn paper: + https://arxiv.org/pdf/2002.08484.pdf. + """ + return self._get_intermediate_quantities_tracincp_fast_rand_proj( + inputs_dataset, self.projection_quantities + ) diff --git a/tests/influence/_core/test_tracin_intermediate_quantities.py b/tests/influence/_core/test_tracin_intermediate_quantities.py new file mode 100644 index 0000000000..7f3e806c28 --- /dev/null +++ b/tests/influence/_core/test_tracin_intermediate_quantities.py @@ -0,0 +1,201 @@ +import tempfile +from typing import Callable + +import torch + +import torch.nn as nn +from captum.influence._core.tracincp_fast_rand_proj import ( + TracInCPFast, + TracInCPFastRandProj, +) +from parameterized import parameterized +from tests.helpers.basic import assertTensorAlmostEqual, BaseTest +from tests.influence._utils.common import ( + build_test_name_func, + DataInfluenceConstructor, + get_random_model_and_data, +) +from torch.utils.data import DataLoader + + +class TestTracInIntermediateQuantities(BaseTest): + @parameterized.expand( + [ + (reduction, constructor, unpack_inputs) + for unpack_inputs in [True, False] + for (reduction, constructor) in [ + ("sum", DataInfluenceConstructor(TracInCPFastRandProj)), + ] + ], + name_func=build_test_name_func(), + ) + def test_tracin_intermediate_quantities_api( + self, reduction: str, tracin_constructor: Callable, unpack_inputs: bool + ) -> None: + """ + tests that the result of calling the public method + `compute_intermediate_quantities` for a DataLoader of batches is the same as + when the batches are collated into a single batch + """ + with tempfile.TemporaryDirectory() as tmpdir: + (net, train_dataset,) = get_random_model_and_data( + tmpdir, + unpack_inputs, + return_test_data=False, + ) + + # create a single batch representing the entire dataset + single_batch = next( + iter(DataLoader(train_dataset, batch_size=len(train_dataset))) + ) + + # create a dataloader that yields batches from the dataset + dataloader = DataLoader(train_dataset, batch_size=5) + + # create tracin instance + criterion = nn.MSELoss(reduction=reduction) + batch_size = 5 + tracin = tracin_constructor( + net, + train_dataset, + tmpdir, + batch_size, + criterion, + ) + + # compute intermediate quantities using `compute_intermediate_quantities` + # when passing in a single batch + single_batch_intermediate_quantities = ( + tracin.compute_intermediate_quantities(single_batch) + ) + + # compute intermediate quantities using `compute_intermediate_quantities` + # when passing in a dataloader with the same examples + dataloader_intermediate_quantities = tracin.compute_intermediate_quantities( + dataloader, + ) + + # the two self influences should be equal + assertTensorAlmostEqual( + self, + single_batch_intermediate_quantities, + dataloader_intermediate_quantities, + delta=0.01, # due to numerical issues, we can't set this to 0.0 + mode="max", + ) + + @parameterized.expand( + [ + ( + reduction, + constructor, + intermediate_quantities_tracin_constructor, + unpack_inputs, + ) + for unpack_inputs in [True, False] + for ( + reduction, + constructor, + intermediate_quantities_tracin_constructor, + ) in [ + ( + "sum", + DataInfluenceConstructor(TracInCPFast), + DataInfluenceConstructor(TracInCPFastRandProj), + ), + ] + ], + name_func=build_test_name_func(), + ) + def test_tracin_intermediate_quantities_consistent( + self, + reduction: str, + tracin_constructor: Callable, + intermediate_quantities_tracin_constructor: Callable, + unpack_inputs: bool, + ) -> None: + """ + Since the influence score of a test batch on a training data should be the dot + product of their intermediate quantities, checks that this is the case, by + computing the influence score 2 different ways and checking they give the same + results: 1) with the `influence` method, and by using the + `compute_intermediate_quantities` method on the test and training data, and + taking the dot product. No projection should be done. Otherwise, the + projection will cause error. For 1), we use an implementation that does not use + intermediate quantities, i.e. `TracInCPFast`. For 2), we use a method that + does use intermediate quantities, i.e. `TracInCPFastRandProj`. Since the + methods for the 2 cases are different, we need to parametrize the test with 2 + different tracin constructors. `tracin_constructor` is the constructor for the + tracin implementation for case 1. `intermediate_quantities_tracin_constructor` + is the constructor for the tracin implementation for case 2. + """ + with tempfile.TemporaryDirectory() as tmpdir: + ( + net, + train_dataset, + test_features, + test_labels, + ) = get_random_model_and_data(tmpdir, unpack_inputs, return_test_data=True) + + # create a dataloader that yields batches from the dataset + train_dataset = DataLoader(train_dataset, batch_size=5) + + # create tracin instance + criterion = nn.MSELoss(reduction=reduction) + batch_size = 5 + + tracin = tracin_constructor( + net, + train_dataset, + tmpdir, + batch_size, + criterion, + ) + + # create tracin instance which exposes `intermediate_quantities` + intermediate_quantities_tracin = intermediate_quantities_tracin_constructor( + net, + train_dataset, + tmpdir, + batch_size, + criterion, + ) + + # compute influence scores without using `compute_intermediate_quantities` + scores = tracin.influence( + test_features, test_labels, unpack_inputs=unpack_inputs + ) + + # compute influence scores using `compute_intermediate_quantities` + # we combine `test_features` and `test_labels` into a single tuple + # `test_batch` to pass to the model, with the assumption that + # `model(test_batch[0:-1]` produces the predictions, and `test_batch[-1]` + # are the labels. We do this due to the assumptions made by the + # `compute_intermediate_quantities` method. Therefore, how we + # form `test_batch` depends on whether `unpack_inputs` is True or False + if not unpack_inputs: + # `test_features` is a Tensor + test_batch = (test_features, test_labels) + else: + # `test_features` is a tuple, so we unpack it to place in tuple, + # along with `test_labels` + test_batch = (*test_features, test_labels) + + # the influence score is the dot product of intermediate quantities + intermediate_quantities_scores = torch.matmul( + intermediate_quantities_tracin.compute_intermediate_quantities( + test_batch + ), + intermediate_quantities_tracin.compute_intermediate_quantities( + train_dataset + ).T, + ) + + # the scores computed using the two methods should be the same + assertTensorAlmostEqual( + self, + scores, + intermediate_quantities_scores, + delta=0.01, # due to numerical issues, we can't set this to 0.0 + mode="max", + ) From b8eff98aaf0b17ff4d57a339cec5d3fba250e006 Mon Sep 17 00:00:00 2001 From: ProGamerGov Date: Mon, 19 Sep 2022 12:28:30 -0700 Subject: [PATCH 125/174] Fix multiple Sphinx warnings & docstrings (#985) Summary: The warning messages take up a lot of space on the console log, and it was really simple to resolve them. The common.rst file was also incorrectly pointing to the wrong path and some of functions were renamed since it was created, so no docs were being generated for that page. `InputBaselineXGradient` was also removed from the public rst api docs, as it's not supposed to be public. In addition to these easy doc warning fixes, I found a module that was listed on on the API docs site, but there's not public path to use it and no tests were ever written for it. I made an issue post for it here: https://github.com/pytorch/captum/issues/989 I fixed some docstring issues like lack of consistent uppercase for `any` and `callable`, spacing, random type / case mistakes I came across, etc... I also fixed some paths. Issues with upgrading to later versions of Sphinx were also resolved. --- Currently Sphinx gives the following warnings / errors for the master branch: ``` /content/captum/sphinx/source/base_classes.rst:2: WARNING: Title underline too short. Base Classes ========== /content/captum/sphinx/source/base_classes.rst:29: WARNING: Title underline too short. Perturbation Attribution ^^^^^^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/base_classes.rst:29: WARNING: Title underline too short. Perturbation Attribution ^^^^^^^^^^^^^^^^^^^^^ WARNING: autodoc: failed to import function 'validate_input' from module 'captum.attr._utils.common'; the following exception was raised: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 448, in safe_getattr return getattr(obj, name, *defargs) AttributeError: module 'captum.attr._utils.common' has no attribute 'validate_input' The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/importer.py", line 110, in import_object obj = attrgetter(obj, mangled_name) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 332, in get_attr return autodoc_attrgetter(self.env.app, obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 2780, in autodoc_attrgetter return safe_getattr(obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 464, in safe_getattr raise AttributeError(name) from exc AttributeError: validate_input WARNING: autodoc: failed to import function 'validate_noise_tunnel_type' from module 'captum.attr._utils.common'; the following exception was raised: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 448, in safe_getattr return getattr(obj, name, *defargs) AttributeError: module 'captum.attr._utils.common' has no attribute 'validate_noise_tunnel_type' The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/importer.py", line 110, in import_object obj = attrgetter(obj, mangled_name) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 332, in get_attr return autodoc_attrgetter(self.env.app, obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 2780, in autodoc_attrgetter return safe_getattr(obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 464, in safe_getattr raise AttributeError(name) from exc AttributeError: validate_noise_tunnel_type WARNING: autodoc: failed to import function 'format_input' from module 'captum.attr._utils.common'; the following exception was raised: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 448, in safe_getattr return getattr(obj, name, *defargs) AttributeError: module 'captum.attr._utils.common' has no attribute 'format_input' The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/importer.py", line 110, in import_object obj = attrgetter(obj, mangled_name) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 332, in get_attr return autodoc_attrgetter(self.env.app, obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 2780, in autodoc_attrgetter return safe_getattr(obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 464, in safe_getattr raise AttributeError(name) from exc AttributeError: format_input WARNING: autodoc: failed to import function '_format_attributions' from module 'captum.attr._utils.common'; the following exception was raised: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 448, in safe_getattr return getattr(obj, name, *defargs) AttributeError: module 'captum.attr._utils.common' has no attribute '_format_attributions' The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/importer.py", line 110, in import_object obj = attrgetter(obj, mangled_name) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 332, in get_attr return autodoc_attrgetter(self.env.app, obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 2780, in autodoc_attrgetter return safe_getattr(obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 464, in safe_getattr raise AttributeError(name) from exc AttributeError: _format_attributions WARNING: autodoc: failed to import function 'zeros' from module 'captum.attr._utils.common'; the following exception was raised: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 448, in safe_getattr return getattr(obj, name, *defargs) AttributeError: module 'captum.attr._utils.common' has no attribute 'zeros' The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/importer.py", line 110, in import_object obj = attrgetter(obj, mangled_name) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 332, in get_attr return autodoc_attrgetter(self.env.app, obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 2780, in autodoc_attrgetter return safe_getattr(obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 464, in safe_getattr raise AttributeError(name) from exc AttributeError: zeros WARNING: autodoc: failed to import function '_run_forward' from module 'captum.attr._utils.common'; the following exception was raised: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 448, in safe_getattr return getattr(obj, name, *defargs) AttributeError: module 'captum.attr._utils.common' has no attribute '_run_forward' The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/importer.py", line 110, in import_object obj = attrgetter(obj, mangled_name) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 332, in get_attr return autodoc_attrgetter(self.env.app, obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 2780, in autodoc_attrgetter return safe_getattr(obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 464, in safe_getattr raise AttributeError(name) from exc AttributeError: _run_forward /content/captum/sphinx/source/concept.rst:2: WARNING: Title underline too short. Concept-based Interpretability ====== /content/captum/sphinx/source/concept.rst:12: WARNING: Title underline too short. ConceptInterpreter ^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/concept.rst:12: WARNING: Title underline too short. ConceptInterpreter ^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/deconvolution.rst:2: WARNING: Title underline too short. Deconvolution ========= /content/captum/captum/attr/_core/deep_lift.py:docstring of captum.attr._core.deep_lift.DeepLiftShap:12: WARNING: Definition list ends without a blank line; unexpected unindent. /content/captum/sphinx/source/feature_ablation.rst:2: WARNING: Title underline too short. Feature Ablation ========= /content/captum/captum/attr/_core/feature_ablation.py:docstring of captum.attr._core.feature_ablation.FeatureAblation.attribute:36: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/sphinx/source/feature_permutation.rst:2: WARNING: Title underline too short. Feature Permutation ========= WARNING: autodoc: failed to import class 'InputBaselineXGradient' from module 'captum.attr'; the following exception was raised: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 448, in safe_getattr return getattr(obj, name, *defargs) AttributeError: module 'captum.attr' has no attribute 'InputBaselineXGradient' The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/importer.py", line 110, in import_object obj = attrgetter(obj, mangled_name) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 332, in get_attr return autodoc_attrgetter(self.env.app, obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/ext/autodoc/__init__.py", line 2780, in autodoc_attrgetter return safe_getattr(obj, name, *defargs) File "/usr/local/lib/python3.7/dist-packages/sphinx/util/inspect.py", line 464, in safe_getattr raise AttributeError(name) from exc AttributeError: InputBaselineXGradient /content/captum/sphinx/source/guided_backprop.rst:2: WARNING: Title underline too short. Guided Backprop ========= /content/captum/sphinx/source/guided_grad_cam.rst:2: WARNING: Title underline too short. Guided GradCAM ========= /content/captum/sphinx/source/influence.rst:2: WARNING: Title underline too short. Influential Examples ====== /content/captum/sphinx/source/influence.rst:12: WARNING: Title underline too short. SimilarityInfluence ^^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/influence.rst:12: WARNING: Title underline too short. SimilarityInfluence ^^^^^^^^^^^^^^^^^ /content/captum/captum/influence/_core/tracincp.py:docstring of captum.influence._core.tracincp.TracInCPBase.influence:61: WARNING: Inline interpreted text or phrase reference start-string without end-string. /content/captum/captum/influence/_core/tracincp.py:docstring of captum.influence._core.tracincp.TracInCP.influence:61: WARNING: Inline interpreted text or phrase reference start-string without end-string. /content/captum/captum/influence/_core/tracincp_fast_rand_proj.py:docstring of captum.influence._core.tracincp_fast_rand_proj.TracInCPFast.influence:62: WARNING: Inline interpreted text or phrase reference start-string without end-string. /content/captum/sphinx/source/influence.rst:38: WARNING: Title underline too short. TracInCPFastRandProj ^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/influence.rst:38: WARNING: Title underline too short. TracInCPFastRandProj ^^^^^^^^^^^^^^^^ /content/captum/captum/influence/_core/tracincp_fast_rand_proj.py:docstring of captum.influence._core.tracincp_fast_rand_proj.TracInCPFastRandProj:1: WARNING: Inline literal start-string without end-string. /content/captum/sphinx/source/input_x_gradient.rst:2: WARNING: Title underline too short. Input X Gradient =============== WARNING: autodoc: failed to import class 'api.Batch' from module 'captum.insights'; the following exception was raised: No module named 'captum.insights.api' WARNING: autodoc: failed to import class 'api.AttributionVisualizer' from module 'captum.insights'; the following exception was raised: No module named 'captum.insights.api' WARNING: autodoc: failed to import class 'features.BaseFeature' from module 'captum.insights'; the following exception was raised: No module named 'captum.insights.features' WARNING: autodoc: failed to import class 'features.GeneralFeature' from module 'captum.insights'; the following exception was raised: No module named 'captum.insights.features' WARNING: autodoc: failed to import class 'features.TextFeature' from module 'captum.insights'; the following exception was raised: No module named 'captum.insights.features' WARNING: autodoc: failed to import class 'features.ImageFeature' from module 'captum.insights'; the following exception was raised: No module named 'captum.insights.features' /content/captum/captum/attr/_core/integrated_gradients.py:docstring of captum.attr._core.integrated_gradients.IntegratedGradients.attribute:43: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/kernel_shap.py:docstring of captum.attr._core.kernel_shap.KernelShap.attribute:66: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/kernel_shap.py:docstring of captum.attr._core.kernel_shap.KernelShap.kernel_shap_perturb_generator:4: WARNING: Block quote ends without a blank line; unexpected unindent. /content/captum/sphinx/source/layer.rst:2: WARNING: Title underline too short. Layer Attribution ====== /content/captum/captum/attr/_core/layer/layer_conductance.py:docstring of captum.attr._core.layer.layer_conductance.LayerConductance.attribute:35: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/sphinx/source/layer.rst:18: WARNING: Title underline too short. Internal Influence ^^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/layer.rst:18: WARNING: Title underline too short. Internal Influence ^^^^^^^^^^^^^^^^^ /content/captum/captum/attr/_core/layer/internal_influence.py:docstring of captum.attr._core.layer.internal_influence.InternalInfluence.attribute:209: WARNING: Inline interpreted text or phrase reference start-string without end-string. /content/captum/sphinx/source/layer.rst:24: WARNING: Title underline too short. Layer Gradient X Activation ^^^^^^^^^^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/layer.rst:24: WARNING: Title underline too short. Layer Gradient X Activation ^^^^^^^^^^^^^^^^^^^^^^^^^ /content/captum/captum/attr/_core/layer/layer_deep_lift.py:docstring of captum.attr._core.layer.layer_deep_lift.LayerDeepLift.attribute:39: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/layer/layer_deep_lift.py:docstring of captum.attr._core.layer.layer_deep_lift.LayerDeepLiftShap:16: WARNING: Definition list ends without a blank line; unexpected unindent. /content/captum/sphinx/source/layer.rst:54: WARNING: Title underline too short. Layer Integrated Gradients ^^^^^^^^^^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/layer.rst:54: WARNING: Title underline too short. Layer Integrated Gradients ^^^^^^^^^^^^^^^^^^^^^^^^^ /content/captum/captum/attr/_core/layer/layer_integrated_gradients.py:docstring of captum.attr._core.layer.layer_integrated_gradients.LayerIntegratedGradients.attribute:35: WARNING: Unexpected indentation. /content/captum/captum/attr/_core/layer/layer_integrated_gradients.py:docstring of captum.attr._core.layer.layer_integrated_gradients.LayerIntegratedGradients.attribute:140: WARNING: Unexpected indentation. /content/captum/captum/attr/_core/layer/layer_integrated_gradients.py:docstring of captum.attr._core.layer.layer_integrated_gradients.LayerIntegratedGradients.attribute:158: WARNING: Block quote ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/layer/layer_lrp.py:docstring of captum.attr._core.layer.layer_lrp.LayerLRP.attribute:79: WARNING: Definition list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/layer/layer_lrp.py:docstring of captum.attr._core.layer.layer_lrp.LayerLRP.attribute:93: WARNING: Block quote ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/lime.py:docstring of captum.attr._core.lime.LimeBase.attribute:111: WARNING: Inline strong start-string without end-string. /content/captum/captum/attr/_core/lime.py:docstring of captum.attr._core.lime.Lime.attribute:66: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/lrp.py:docstring of captum.attr._core.lrp.LRP.attribute:68: WARNING: Unexpected indentation. /content/captum/captum/attr/_core/lrp.py:docstring of captum.attr._core.lrp.LRP.attribute:80: WARNING: Block quote ends without a blank line; unexpected unindent. /content/captum/sphinx/source/metrics.rst:2: WARNING: Title underline too short. Metrics ====== /content/captum/captum/metrics/_core/infidelity.py:docstring of captum.metrics._core.infidelity.infidelity:83: WARNING: Definition list ends without a blank line; unexpected unindent. /content/captum/captum/metrics/_core/sensitivity.py:docstring of captum.metrics._core.sensitivity.sensitivity_max:112: WARNING: Inline strong start-string without end-string. /content/captum/sphinx/source/neuron.rst:2: WARNING: Title underline too short. Neuron Attribution ======= /content/captum/sphinx/source/neuron.rst:11: WARNING: Title underline too short. Neuron Integrated Gradients ^^^^^^^^^^^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/neuron.rst:11: WARNING: Title underline too short. Neuron Integrated Gradients ^^^^^^^^^^^^^^^^^^^^^^^^^^ /content/captum/captum/attr/_core/neuron/neuron_deep_lift.py:docstring of captum.attr._core.neuron.neuron_deep_lift.NeuronDeepLiftShap:16: WARNING: Definition list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/neuron/neuron_feature_ablation.py:docstring of captum.attr._core.neuron.neuron_feature_ablation.NeuronFeatureAblation.attribute:69: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/noise_tunnel.py:docstring of captum.attr._core.noise_tunnel.NoiseTunnel:18: WARNING: Definition list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/occlusion.py:docstring of captum.attr._core.occlusion.Occlusion.attribute:68: WARNING: Bullet list ends without a blank line; unexpected unindent. WARNING: autodoc: failed to import module 'pytext' from module 'captum.attr._models'; the following exception was raised: No module named 'pytext' WARNING: don't know which module to import for autodocumenting 'PyTextInterpretableEmbedding' (try placing a "module" or "currentmodule" directive in the document, or giving an explicit module name) WARNING: don't know which module to import for autodocumenting 'BaselineGenerator' (try placing a "module" or "currentmodule" directive in the document, or giving an explicit module name) /content/captum/sphinx/source/robust.rst:2: WARNING: Title underline too short. Robustness ====== /content/captum/sphinx/source/robust.rst:26: WARNING: Title underline too short. Min Param Perturbation ^^^^^^^^^^^^^^^^ /content/captum/sphinx/source/robust.rst:26: WARNING: Title underline too short. Min Param Perturbation ^^^^^^^^^^^^^^^^ /content/captum/captum/robust/_core/metrics/min_param_perturbation.py:docstring of captum.robust._core.metrics.min_param_perturbation.MinParamPerturbation:75: WARNING: Inline strong start-string without end-string. /content/captum/sphinx/source/shapley_value_sampling.rst:2: WARNING: Title underline too short. Shapley Value Sampling ========= /content/captum/captum/attr/_core/shapley_value.py:docstring of captum.attr._core.shapley_value.ShapleyValueSampling.attribute:42: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/shapley_value.py:docstring of captum.attr._core.shapley_value.ShapleyValues.attribute:42: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_utils/visualization.py:docstring of captum.attr._utils.visualization.visualize_image_attr:37: WARNING: Enumerated list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_utils/visualization.py:docstring of captum.attr._utils.visualization.visualize_image_attr:54: WARNING: Enumerated list ends without a blank line; unexpected unindent. looking for now-outdated files... none found pickling environment... done checking consistency... /content/captum/sphinx/source/approximation_methods.rst: WARNING: document isn't included in any toctree /content/captum/sphinx/source/common.rst: WARNING: document isn't included in any toctree /content/captum/sphinx/source/pytext.rst: WARNING: document isn't included in any toctree done ``` With the changes in this PR, Sphinx now only gives the following warnings instead of the above multi page list: ``` /content/captum/captum/attr/_core/layer/layer_integrated_gradients.py:docstring of captum.attr._core.layer.layer_integrated_gradients.LayerIntegratedGradients.attribute:147: WARNING: Bullet list ends without a blank line; unexpected unindent. /content/captum/captum/attr/_core/lime.py:docstring of captum.attr._core.lime.LimeBase.attribute:111: WARNING: Inline strong start-string without end-string. /content/captum/captum/metrics/_core/sensitivity.py:docstring of captum.metrics._core.sensitivity.sensitivity_max:112: WARNING: Inline strong start-string without end-string. WARNING: autodoc: failed to import class 'pytext.PyTextInterpretableEmbedding' from module 'captum.attr._models'; the following exception was raised: No module named 'pytext' WARNING: autodoc: failed to import class 'pytext.BaselineGenerator' from module 'captum.attr._models'; the following exception was raised: No module named 'pytext' /content/captum/captum/robust/_core/metrics/min_param_perturbation.py:docstring of captum.robust._core.metrics.min_param_perturbation.MinParamPerturbation:77: WARNING: Inline strong start-string without end-string. looking for now-outdated files... none found pickling environment... done checking consistency... /content/captum/sphinx/source/pytext.rst: WARNING: document isn't included in any toctree done ``` Pull Request resolved: https://github.com/pytorch/captum/pull/985 Reviewed By: vivekmig Differential Revision: D39335917 Pulled By: aobo-y fbshipit-source-id: a5f96cc78cdbc1365ba683df32c7f8b6162197d4 --- README.md | 21 +++-- captum/_utils/av.py | 22 ++--- captum/_utils/gradient.py | 4 +- captum/_utils/models/linear_model/model.py | 8 +- captum/attr/_core/deep_lift.py | 38 +++++---- captum/attr/_core/feature_ablation.py | 23 ++--- captum/attr/_core/feature_permutation.py | 34 ++++---- captum/attr/_core/gradient_shap.py | 24 +++--- .../attr/_core/guided_backprop_deconvnet.py | 26 +++--- captum/attr/_core/guided_grad_cam.py | 16 ++-- captum/attr/_core/input_x_gradient.py | 12 +-- captum/attr/_core/integrated_gradients.py | 19 +++-- captum/attr/_core/kernel_shap.py | 23 ++--- captum/attr/_core/layer/grad_cam.py | 16 ++-- captum/attr/_core/layer/internal_influence.py | 22 ++--- captum/attr/_core/layer/layer_activation.py | 14 +-- captum/attr/_core/layer/layer_conductance.py | 23 ++--- captum/attr/_core/layer/layer_deep_lift.py | 39 +++++---- .../_core/layer/layer_feature_ablation.py | 18 ++-- .../attr/_core/layer/layer_gradient_shap.py | 28 +++--- .../layer/layer_gradient_x_activation.py | 16 ++-- .../_core/layer/layer_integrated_gradients.py | 43 ++++++---- captum/attr/_core/layer/layer_lrp.py | 35 ++++---- captum/attr/_core/lime.py | 47 +++++----- captum/attr/_core/lrp.py | 38 +++++---- .../attr/_core/neuron/neuron_conductance.py | 20 ++--- captum/attr/_core/neuron/neuron_deep_lift.py | 36 ++++---- .../_core/neuron/neuron_feature_ablation.py | 22 ++--- captum/attr/_core/neuron/neuron_gradient.py | 16 ++-- .../attr/_core/neuron/neuron_gradient_shap.py | 26 +++--- .../neuron_guided_backprop_deconvnet.py | 32 +++---- .../neuron/neuron_integrated_gradients.py | 20 ++--- captum/attr/_core/noise_tunnel.py | 24 +++--- captum/attr/_core/occlusion.py | 21 ++--- captum/attr/_core/saliency.py | 16 ++-- captum/attr/_core/shapley_value.py | 36 ++++---- captum/attr/_models/base.py | 29 ++++--- captum/attr/_utils/approximation_methods.py | 17 ++-- captum/attr/_utils/attribution.py | 69 +++++++-------- captum/attr/_utils/class_summarizer.py | 4 +- captum/attr/_utils/summarizer.py | 4 +- captum/attr/_utils/visualization.py | 52 ++++++------ captum/concept/_core/cav.py | 6 +- captum/concept/_core/concept.py | 5 +- captum/concept/_core/tcav.py | 34 +++++--- captum/concept/_utils/classifier.py | 6 +- captum/concept/_utils/data_iterator.py | 2 +- .../influence/_core/similarity_influence.py | 6 +- captum/influence/_core/tracincp.py | 69 ++++++++------- .../_core/tracincp_fast_rand_proj.py | 85 +++++++++++-------- captum/influence/_utils/common.py | 17 ++-- captum/influence/_utils/nearest_neighbors.py | 8 +- captum/insights/__init__.py | 2 +- captum/insights/attr_vis/app.py | 16 ++-- captum/insights/attr_vis/features.py | 18 ++-- captum/metrics/_core/infidelity.py | 29 ++++--- captum/metrics/_core/sensitivity.py | 32 +++---- captum/metrics/_utils/batching.py | 6 +- captum/robust/_core/fgsm.py | 44 ++++++---- .../robust/_core/metrics/attack_comparator.py | 59 +++++++------ .../_core/metrics/min_param_perturbation.py | 32 +++---- captum/robust/_core/perturbation.py | 6 +- captum/robust/_core/pgd.py | 29 ++++--- ...lgorithms.md => attribution_algorithms.md} | 2 +- docs/contribution_guide.md | 4 +- docs/extension/integrated_gradients.md | 10 +-- docs/faq.md | 4 +- scripts/install_via_pip.sh | 4 +- sphinx/source/approximation_methods.rst | 2 +- sphinx/source/base_classes.rst | 12 +-- sphinx/source/common.rst | 12 --- sphinx/source/concept.rst | 10 +-- sphinx/source/conf.py | 50 +++++++++++ sphinx/source/deconvolution.rst | 2 +- sphinx/source/feature_ablation.rst | 3 +- sphinx/source/feature_permutation.rst | 3 +- sphinx/source/gradient_shap.rst | 3 - sphinx/source/guided_backprop.rst | 2 +- sphinx/source/guided_grad_cam.rst | 2 +- sphinx/source/influence.rst | 14 +-- sphinx/source/input_x_gradient.rst | 2 +- sphinx/source/insights.rst | 4 +- sphinx/source/kernel_shap.rst | 1 + sphinx/source/layer.rst | 24 +++--- sphinx/source/lime.rst | 1 + sphinx/source/metrics.rst | 6 +- sphinx/source/neuron.rst | 21 ++--- sphinx/source/noise_tunnel.rst | 1 + sphinx/source/occlusion.rst | 1 + sphinx/source/pytext.rst | 7 +- sphinx/source/robust.rst | 10 +-- sphinx/source/shapley_value_sampling.rst | 4 +- sphinx/source/utilities.rst | 3 + .../test_tracin_intermediate_quantities.py | 2 +- website/sidebars.json | 2 +- 95 files changed, 967 insertions(+), 825 deletions(-) rename docs/{algorithms.md => attribution_algorithms.md} (99%) delete mode 100644 sphinx/source/common.rst diff --git a/README.md b/README.md index 5f415f7e0a..afa2f99d3f 100644 --- a/README.md +++ b/README.md @@ -159,8 +159,7 @@ model.eval() Next, we need to define simple input and baseline tensors. Baselines belong to the input space and often carry no predictive signal. Zero tensor can serve as a baseline for many tasks. -Some interpretability algorithms such as `Integrated -Gradients`, `Deeplift` and `GradientShap` are designed to attribute the change +Some interpretability algorithms such as `IntegratedGradients`, `Deeplift` and `GradientShap` are designed to attribute the change between the input and baseline to a predictive class or a value that the neural network outputs. @@ -472,23 +471,23 @@ You can watch the recorded talk [here](https://www.youtube.com/watch?v=ayhBHZYje * `SmoothGrad`: [SmoothGrad: removing noise by adding noise, Daniel Smilkov et al. 2017](https://arxiv.org/abs/1706.03825) * `NoiseTunnel`: [Sanity Checks for Saliency Maps, Julius Adebayo et al. 2018](https://arxiv.org/abs/1810.03292) * `NeuronConductance`: [How Important is a neuron?, Kedar Dhamdhere et al. 2018](https://arxiv.org/abs/1805.12233) -* `LayerConductance`: [Computationally Efficient Measures of Internal Neuron Importance, Avanti Shrikumar et al. 2018](https://arxiv.org/pdf/1807.09946.pdf) -* `DeepLift`, `NeuronDeepLift`, `LayerDeepLift`: [Learning Important Features Through Propagating Activation Differences, Avanti Shrikumar et al. 2017](https://arxiv.org/pdf/1704.02685.pdf) and [Towards better understanding of gradient-based attribution methods for deep neural networks, Marco Ancona et al. 2018](https://openreview.net/pdf?id=Sy21R9JAW) -* `NeuronIntegratedGradients`: [Computationally Efficient Measures of Internal Neuron Importance, Avanti Shrikumar et al. 2018](https://arxiv.org/pdf/1807.09946.pdf) +* `LayerConductance`: [Computationally Efficient Measures of Internal Neuron Importance, Avanti Shrikumar et al. 2018](https://arxiv.org/abs/1807.09946) +* `DeepLift`, `NeuronDeepLift`, `LayerDeepLift`: [Learning Important Features Through Propagating Activation Differences, Avanti Shrikumar et al. 2017](https://arxiv.org/abs/1704.02685) and [Towards better understanding of gradient-based attribution methods for deep neural networks, Marco Ancona et al. 2018](https://openreview.net/pdf?id=Sy21R9JAW) +* `NeuronIntegratedGradients`: [Computationally Efficient Measures of Internal Neuron Importance, Avanti Shrikumar et al. 2018](https://arxiv.org/abs/1807.09946) * `GradientShap`, `NeuronGradientShap`, `LayerGradientShap`, `DeepLiftShap`, `NeuronDeepLiftShap`, `LayerDeepLiftShap`: [A Unified Approach to Interpreting Model Predictions, Scott M. Lundberg et al. 2017](http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions) -* `InternalInfluence`: [Influence-Directed Explanations for Deep Convolutional Networks, Klas Leino et al. 2018](https://arxiv.org/pdf/1802.03788.pdf) +* `InternalInfluence`: [Influence-Directed Explanations for Deep Convolutional Networks, Klas Leino et al. 2018](https://arxiv.org/abs/1802.03788) * `Saliency`, `NeuronGradient`: [Deep Inside Convolutional Networks: Visualising -Image Classification Models and Saliency Maps, K. Simonyan, et. al. 2014](https://arxiv.org/pdf/1312.6034.pdf) -* `GradCAM`, `Guided GradCAM`: [Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization, Ramprasaath R. Selvaraju et al. 2017](https://arxiv.org/abs/1610.02391.pdf) -* `Deconvolution`, `Neuron Deconvolution`: [Visualizing and Understanding Convolutional Networks, Matthew D Zeiler et al. 2014](https://arxiv.org/pdf/1311.2901.pdf) -* `Guided Backpropagation`, `Neuron Guided Backpropagation`: [Striving for Simplicity: The All Convolutional Net, Jost Tobias Springenberg et al. 2015](https://arxiv.org/pdf/1412.6806.pdf) +Image Classification Models and Saliency Maps, K. Simonyan, et. al. 2014](https://arxiv.org/abs/1312.6034) +* `GradCAM`, `Guided GradCAM`: [Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization, Ramprasaath R. Selvaraju et al. 2017](https://arxiv.org/abs/1610.02391) +* `Deconvolution`, `Neuron Deconvolution`: [Visualizing and Understanding Convolutional Networks, Matthew D Zeiler et al. 2014](https://arxiv.org/abs/1311.2901) +* `Guided Backpropagation`, `Neuron Guided Backpropagation`: [Striving for Simplicity: The All Convolutional Net, Jost Tobias Springenberg et al. 2015](https://arxiv.org/abs/1412.6806) * `Feature Permutation`: [Permutation Feature Importance](https://christophm.github.io/interpretable-ml-book/feature-importance.html) * `Occlusion`: [Visualizing and Understanding Convolutional Networks](https://arxiv.org/abs/1311.2901) * `Shapley Value`: [A value for n-person games. Contributions to the Theory of Games 2.28 (1953): 307-317](https://apps.dtic.mil/dtic/tr/fulltext/u2/604084.pdf) * `Shapley Value Sampling`: [Polynomial calculation of the Shapley value based on sampling](https://www.sciencedirect.com/science/article/pii/S0305054808000804) * `Infidelity and Sensitivity`: [On the (In)fidelity and Sensitivity for Explanations](https://arxiv.org/abs/1901.09392) -More details about the above mentioned [algorithms](https://captum.ai/docs/algorithms) and their pros and cons can be found on our [web-site](https://captum.ai/docs/algorithms_comparison_matrix). +More details about the above mentioned [attribution algorithms](https://captum.ai/docs/attribution_algorithms) and their pros and cons can be found on our [web-site](https://captum.ai/docs/algorithms_comparison_matrix). ## License Captum is BSD licensed, as found in the [LICENSE](LICENSE) file. diff --git a/captum/_utils/av.py b/captum/_utils/av.py index ac3c32a204..1b749162f8 100644 --- a/captum/_utils/av.py +++ b/captum/_utils/av.py @@ -80,7 +80,7 @@ def __getitem__(self, idx: int) -> Union[Tensor, Tuple[Tensor, ...]]: av = torch.load(fl) return av - def __len__(self): + def __len__(self) -> int: return len(self.files) AV_DIR_NAME: str = "av" @@ -211,9 +211,9 @@ def save( AV.generate_dataset_activations from batch index. It assumes identifier is same for all layers if a list of `layers` is provided. - layers (str or List of str): The layer(s) for which the activation vectors + layers (str or list[str]): The layer(s) for which the activation vectors are computed. - act_tensors (Tensor or List of Tensor): A batch of activation vectors. + act_tensors (tensor or list of tensor): A batch of activation vectors. This must match the dimension of `layers`. num_id (str): string representing the batch number for which the activation vectors are computed @@ -299,13 +299,15 @@ def _manage_loading_layers( for the `layer` are stored. model_id (str): The name/version of the model for which layer activations are being computed and stored. - layers (str or List of str): The layer(s) for which the activation vectors + layers (str or list[str]): The layer(s) for which the activation vectors are computed. + load_from_disk (bool, optional): Whether or not to load from disk. + Default: True identifier (str or None): An optional identifier for the layer activations. Can be used to distinguish between activations for different training batches. - num_id (str): An optional string representing the batch number for which the - activation vectors are computed + num_id (str, optional): An optional string representing the batch number + for which the activation vectors are computed. Returns: List of layer names for which activations should be generated @@ -357,9 +359,9 @@ def _compute_and_save_activations( define all of its layers as attributes of the model. model_id (str): The name/version of the model for which layer activations are being computed and stored. - layers (str or List of str): The layer(s) for which the activation vectors + layers (str or list[str]): The layer(s) for which the activation vectors are computed. - inputs (tensor or tuple of tensors): Batch of examples for + inputs (Tensor or tuple of Tensor): Batch of examples for which influential instances are computed. They are passed to the input `model`. The first dimension in `inputs` tensor or tuple of tensors corresponds to the batch size. @@ -368,7 +370,7 @@ def _compute_and_save_activations( different training batches. num_id (str): An required string representing the batch number for which the activation vectors are computed - additional_forward_args (optional): Additional arguments that will be + additional_forward_args (Any, optional): Additional arguments that will be passed to `model` after inputs. Default: None load_from_disk (bool): Forces function to regenerate activations if False. @@ -433,7 +435,7 @@ def generate_dataset_activations( define all of its layers as attributes of the model. model_id (str): The name/version of the model for which layer activations are being computed and stored. - layers (str or List of str): The layer(s) for which the activation vectors + layers (str or list[str]): The layer(s) for which the activation vectors are computed. dataloader (torch.utils.data.DataLoader): DataLoader that yields Dataset for which influential instances are computed. They are passed to diff --git a/captum/_utils/gradient.py b/captum/_utils/gradient.py index a15157d8d7..5b853cd435 100644 --- a/captum/_utils/gradient.py +++ b/captum/_utils/gradient.py @@ -730,7 +730,7 @@ def _compute_jacobian_wrt_params( but must behave as a library loss function would if `reduction='none'`. Returns: - grads (Tuple of Tensor): Returns the Jacobian for the minibatch as a + grads (tuple of Tensor): Returns the Jacobian for the minibatch as a tuple of gradients corresponding to the tuple of trainable parameters returned by `model.parameters()`. Each object grads[i] references to the gradients for the parameters in the i-th trainable layer of the model. @@ -804,7 +804,7 @@ def _compute_jacobian_wrt_params_with_sample_wise_trick( Defaults to 'sum'. Returns: - grads (Tuple of Tensor): Returns the Jacobian for the minibatch as a + grads (tuple of Tensor): Returns the Jacobian for the minibatch as a tuple of gradients corresponding to the tuple of trainable parameters returned by `model.parameters()`. Each object grads[i] references to the gradients for the parameters in the i-th trainable layer of the model. diff --git a/captum/_utils/models/linear_model/model.py b/captum/_utils/models/linear_model/model.py index bfffdbf38a..24302d540c 100644 --- a/captum/_utils/models/linear_model/model.py +++ b/captum/_utils/models/linear_model/model.py @@ -20,7 +20,7 @@ def __init__(self, train_fn: Callable, **kwargs) -> None: Please note that this is an experimental feature. Args: - train_fn (callable) + train_fn (Callable) The function to train with. See `captum._utils.models.linear_model.train.sgd_train_linear_model` and @@ -65,14 +65,14 @@ def _construct_model_params( normalization parameters used. bias (bool): Whether to add a bias term. Not needed if normalized input. - weight_values (tensor, optional): + weight_values (Tensor, optional): The values to initialize the linear model with. This must be a 1D or 2D tensor, and of the form `(num_outputs, num_features)` or `(num_features,)`. Additionally, if this is provided you need not to provide `in_features` or `out_features`. - bias_value (tensor, optional): + bias_value (Tensor, optional): The bias value to initialize the model with. - classes (tensor, optional): + classes (Tensor, optional): The list of prediction classes supported by the model in case it performs classificaton. In case of regression it is set to None. Default: None diff --git a/captum/attr/_core/deep_lift.py b/captum/attr/_core/deep_lift.py index 251e68dc23..ea059d7fcc 100644 --- a/captum/attr/_core/deep_lift.py +++ b/captum/attr/_core/deep_lift.py @@ -112,7 +112,7 @@ def __init__( r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place nonlinear submodules; these are not supported by the register_full_backward_hook PyTorch API starting from PyTorch v1.9. @@ -185,7 +185,7 @@ def attribute( # type: ignore r""" Args: - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -194,7 +194,7 @@ def attribute( # type: ignore to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference samples that are compared with the inputs. In order to assign attribution scores DeepLift computes the differences between the inputs/outputs and @@ -226,7 +226,7 @@ def attribute( # type: ignore use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -251,7 +251,7 @@ def attribute( # type: ignore target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -267,7 +267,7 @@ def attribute( # type: ignore is set to True convergence delta will be returned in a tuple following attributions. Default: False - custom_attribution_func (callable, optional): A custom function for + custom_attribution_func (Callable, optional): A custom function for computing final attribution scores. This function can take at least one and at most three arguments with the following signature: @@ -288,7 +288,7 @@ def attribute( # type: ignore Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution score computed based on DeepLift rescale rule with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value @@ -296,14 +296,14 @@ def attribute( # type: ignore If a single tensor is provided as inputs, a single tensor is returned. If a tuple is provided for inputs, a tuple of corresponding sized tensors is returned. - - **delta** (*tensor*, returned if return_convergence_delta=True): + - **delta** (*Tensor*, returned if return_convergence_delta=True): This is computed using the property that the total sum of forward_func(inputs) - forward_func(baselines) must equal the total sum of the attributions computed based on DeepLift's rescale rule. Delta is calculated per example, meaning that the number of elements in returned delta tensor is equal to the number of - of examples in input. + examples in input. Note that the logic described for deltas is guaranteed when the default logic for attribution computations is used, meaning that the `custom_attribution_func=None`, otherwise it is not guaranteed and @@ -611,12 +611,14 @@ class DeepLiftShap(DeepLift): each baseline and averages resulting attributions. More details about the algorithm can be found here: - http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf + https://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf Note that the explanation model: + 1. Assumes that input features are independent of one another 2. Is linear, meaning that the explanations are modeled through the additive composition of feature effects. + Although, it assumes a linear model for each explanation, the overall model across multiple explanations can be complex and non-linear. """ @@ -625,7 +627,7 @@ def __init__(self, model: Module, multiply_by_inputs: bool = True) -> None: r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place nonlinear submodules; these are not supported by the register_full_backward_hook PyTorch API. multiply_by_inputs (bool, optional): Indicates whether to factor @@ -694,7 +696,7 @@ def attribute( # type: ignore r""" Args: - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -703,7 +705,7 @@ def attribute( # type: ignore to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (tensor, tuple of tensors, callable): + baselines (Tensor, tuple of Tensor, or Callable): Baselines define reference samples that are compared with the inputs. In order to assign attribution scores DeepLift computes the differences between the inputs/outputs and @@ -728,7 +730,7 @@ def attribute( # type: ignore It is recommended that the number of samples in the baselines' tensors is larger than one. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -753,7 +755,7 @@ def attribute( # type: ignore target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -769,7 +771,7 @@ def attribute( # type: ignore is set to True convergence delta will be returned in a tuple following attributions. Default: False - custom_attribution_func (callable, optional): A custom function for + custom_attribution_func (Callable, optional): A custom function for computing final attribution scores. This function can take at least one and at most three arguments with the following signature: @@ -789,7 +791,7 @@ def attribute( # type: ignore Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution score computed based on DeepLift rescale rule with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value @@ -797,7 +799,7 @@ def attribute( # type: ignore If a single tensor is provided as inputs, a single tensor is returned. If a tuple is provided for inputs, a tuple of corresponding sized tensors is returned. - - **delta** (*tensor*, returned if return_convergence_delta=True): + - **delta** (*Tensor*, returned if return_convergence_delta=True): This is computed using the property that the total sum of forward_func(inputs) - forward_func(baselines) must be very close to the total sum of attributions diff --git a/captum/attr/_core/feature_ablation.py b/captum/attr/_core/feature_ablation.py index fd0007fc75..70de13e81c 100644 --- a/captum/attr/_core/feature_ablation.py +++ b/captum/attr/_core/feature_ablation.py @@ -47,8 +47,8 @@ def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable): The forward function of the model or - any modification of it + forward_func (Callable): The forward function of the model or + any modification of it. """ PerturbationAttribution.__init__(self, forward_func) self.use_weights = False @@ -68,7 +68,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which ablation + inputs (Tensor or tuple of Tensor): Input for which ablation attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -77,7 +77,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference value which replaces each feature when ablated. Baselines can be provided as: @@ -101,10 +101,11 @@ def attribute( - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -129,7 +130,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -144,7 +145,7 @@ def attribute( Note that attributions are not computed with respect to these arguments. Default: None - feature_mask (tensor or tuple of tensors, optional): + feature_mask (Tensor or tuple of Tensor, optional): feature_mask defines a mask for the input, grouping features which should be ablated together. feature_mask should contain the same number of tensors as inputs. @@ -193,8 +194,8 @@ def attribute( Default: None Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The attributions with respect to each input feature. If the forward function returns a scalar value per example, attributions will be @@ -414,10 +415,10 @@ def _ith_input_ablation_generator( **kwargs, ): """ - This method return an generator of ablation perturbations of the i-th input + This method returns a generator of ablation perturbations of the i-th input Returns: - ablation_iter (generator): yields each perturbation to be evaluated + ablation_iter (Generator): yields each perturbation to be evaluated as a tuple (inputs, additional_forward_args, targets, mask). """ extra_args = {} diff --git a/captum/attr/_core/feature_permutation.py b/captum/attr/_core/feature_permutation.py index 544ff16ac6..9aac4c11a1 100644 --- a/captum/attr/_core/feature_permutation.py +++ b/captum/attr/_core/feature_permutation.py @@ -75,9 +75,9 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or - any modification of it - perm_func (callable, optional): A function that accepts a batch of + forward_func (Callable): The forward function of the model or + any modification of it. + perm_func (Callable, optional): A function that accepts a batch of inputs and a feature mask, and "permutes" the feature using feature mask across the batch. This defaults to a function which applies a random permutation, this argument only needs @@ -101,14 +101,16 @@ def attribute( # type: ignore **kwargs: Any, ) -> TensorOrTupleOfTensorsGeneric: r""" - This function is almost equivalent to `FeatureAblation.attribute`. The - main difference is the way ablated examples are generated. Specifically - they are generated through the `perm_func`, as we set the baselines for - `FeatureAblation.attribute` to None. + This function is almost equivalent to + :func:`FeatureAblation.attribute `. The + main difference is the way ablated examples are generated. Specifically they + are generated through the ``perm_func``, as we set the baselines for + :func:`FeatureAblation.attribute ` to + ``None``. Args: - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which permutation attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If @@ -118,7 +120,7 @@ def attribute( # type: ignore 0 corresponds to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which difference is computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -143,7 +145,7 @@ def attribute( # type: ignore target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -158,7 +160,7 @@ def attribute( # type: ignore Note that attributions are not computed with respect to these arguments. Default: None - feature_mask (tensor or tuple of tensors, optional): + feature_mask (Tensor or tuple of Tensor, optional): feature_mask defines a mask for the input, grouping features which should be ablated together. feature_mask should contain the same number of tensors as inputs. @@ -196,14 +198,14 @@ def attribute( # type: ignore a simple output of progress. Default: False **kwargs (Any, optional): Any additional arguments used by child - classes of FeatureAblation (such as Occlusion) to construct - ablations. These arguments are ignored when using - FeatureAblation directly. + classes of :class:`.FeatureAblation` (such as + :class:`.Occlusion`) to construct ablations. These + arguments are ignored when using FeatureAblation directly. Default: None Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The attributions with respect to each input feature. If the forward function returns a scalar value per example, attributions will be diff --git a/captum/attr/_core/gradient_shap.py b/captum/attr/_core/gradient_shap.py index 57d5e909af..f6ec8da302 100644 --- a/captum/attr/_core/gradient_shap.py +++ b/captum/attr/_core/gradient_shap.py @@ -50,7 +50,7 @@ class GradientShap(GradientAttribution): In some sense it can be viewed as an approximation of integrated gradients by computing the expectations of gradients for different baselines. - Current implementation uses Smoothgrad from `NoiseTunnel` in order to + Current implementation uses Smoothgrad from :class:`.NoiseTunnel` in order to randomly draw samples from the distribution of baselines, add noise to input samples and compute the expectation (smoothgrad). """ @@ -59,7 +59,7 @@ def __init__(self, forward_func: Callable, multiply_by_inputs: bool = True) -> N r""" Args: - forward_func (function): The forward function of the model or + forward_func (Callable): The forward function of the model or any modification of it. multiply_by_inputs (bool, optional): Indicates whether to factor model inputs' multiplier in the final attribution scores. @@ -127,7 +127,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which SHAP attribution + inputs (Tensor or tuple of Tensor): Input for which SHAP attribution values are computed. If `forward_func` takes a single tensor as input, a single input tensor should be provided. If `forward_func` takes multiple tensors as input, a tuple @@ -135,7 +135,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (tensor, tuple of tensors, callable): + baselines (Tensor, tuple of Tensor, or Callable): Baselines define the starting point from which expectation is computed and can be provided as: @@ -158,11 +158,11 @@ def attribute( It is recommended that the number of samples in the baselines' tensors is larger than one. - n_samples (int, optional): The number of randomly generated examples + n_samples (int, optional): The number of randomly generated examples per sample in the input batch. Random examples are generated by adding gaussian random noise to each sample. Default: `5` if `n_samples` is not provided. - stdevs (float, or a tuple of floats optional): The standard deviation + stdevs (float or tuple of float, optional): The standard deviation of gaussian noise with zero mean that is added to each input in the batch. If `stdevs` is a single float value then that same value is used for all inputs. If it is @@ -171,7 +171,7 @@ def attribute( corresponds to the input with the same index in the inputs tuple. Default: 0.0 - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -196,7 +196,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It can contain a tuple of ND tensors or @@ -215,7 +215,7 @@ def attribute( Default: False Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution score computed based on GradientSHAP with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value @@ -223,7 +223,7 @@ def attribute( If a single tensor is provided as inputs, a single tensor is returned. If a tuple is provided for inputs, a tuple of corresponding sized tensors is returned. - - **delta** (*tensor*, returned if return_convergence_delta=True): + - **delta** (*Tensor*, returned if return_convergence_delta=True): This is computed using the property that the total sum of forward_func(inputs) - forward_func(baselines) must be very close to the total sum of the attributions @@ -294,8 +294,8 @@ def __init__(self, forward_func: Callable, multiply_by_inputs=True) -> None: r""" Args: - forward_func (function): The forward function of the model or - any modification of it + forward_func (Callable): The forward function of the model or + any modification of it. multiply_by_inputs (bool, optional): Indicates whether to factor model inputs' multiplier in the final attribution scores. In the literature this is also known as local vs global diff --git a/captum/attr/_core/guided_backprop_deconvnet.py b/captum/attr/_core/guided_backprop_deconvnet.py index e1953ed5b9..ba2c2114c5 100644 --- a/captum/attr/_core/guided_backprop_deconvnet.py +++ b/captum/attr/_core/guided_backprop_deconvnet.py @@ -27,7 +27,7 @@ def __init__(self, model: Module, use_relu_grad_output: bool = False) -> None: r""" Args: - model (nn.Module): The reference to PyTorch model instance. + model (nn.Module): The reference to PyTorch model instance. """ GradientAttribution.__init__(self, model) self.model = model @@ -121,7 +121,7 @@ def __init__(self, model: Module) -> None: r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place ReLU submodules; these are not supported by the register_full_backward_hook PyTorch API. """ @@ -139,7 +139,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -148,7 +148,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -173,7 +173,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -186,8 +186,8 @@ def attribute( Default: None Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The guided backprop gradients with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value @@ -234,7 +234,7 @@ def __init__(self, model: Module) -> None: r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place ReLU submodules; these are not supported by the register_full_backward_hook PyTorch API. """ @@ -250,7 +250,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -259,7 +259,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -284,7 +284,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -297,8 +297,8 @@ def attribute( Default: None Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The deconvolution attributions with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value diff --git a/captum/attr/_core/guided_grad_cam.py b/captum/attr/_core/guided_grad_cam.py index f6e29c4b29..3c7478bae8 100644 --- a/captum/attr/_core/guided_grad_cam.py +++ b/captum/attr/_core/guided_grad_cam.py @@ -38,7 +38,7 @@ class GuidedGradCam(GradientAttribution): More details regarding GuidedGradCAM can be found in the original GradCAM paper here: - https://arxiv.org/pdf/1610.02391.pdf + https://arxiv.org/abs/1610.02391 Warning: Ensure that all ReLU operations in the forward function of the given model are performed using a module (nn.module.ReLU). @@ -51,14 +51,14 @@ def __init__( r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place ReLU submodules; these are not supported by the register_full_backward_hook PyTorch API starting from PyTorch v1.9. layer (torch.nn.Module): Layer for which GradCAM attributions are computed. Currently, only layers with a single tensor output are supported. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -80,7 +80,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which attributions + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -88,7 +88,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -113,7 +113,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -151,8 +151,8 @@ def attribute( Default: False Returns: - *tensor* of **attributions**: - - **attributions** (*tensor*): + *Tensor* of **attributions**: + - **attributions** (*Tensor*): Element-wise product of (upsampled) GradCAM and Guided Backprop attributions. If a single tensor is provided as inputs, a single tensor is diff --git a/captum/attr/_core/input_x_gradient.py b/captum/attr/_core/input_x_gradient.py index 7817466013..fcf1d85025 100644 --- a/captum/attr/_core/input_x_gradient.py +++ b/captum/attr/_core/input_x_gradient.py @@ -22,7 +22,7 @@ def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it """ GradientAttribution.__init__(self, forward_func) @@ -37,7 +37,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -46,7 +46,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -71,7 +71,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -84,8 +84,8 @@ def attribute( Default: None Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The input x gradient with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value diff --git a/captum/attr/_core/integrated_gradients.py b/captum/attr/_core/integrated_gradients.py index e96a826c32..04896fac61 100644 --- a/captum/attr/_core/integrated_gradients.py +++ b/captum/attr/_core/integrated_gradients.py @@ -53,7 +53,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it multiply_by_inputs (bool, optional): Indicates whether to factor model inputs' multiplier in the final attribution scores. @@ -130,7 +130,7 @@ def attribute( # type: ignore Args: - inputs (tensor or tuple of tensors): Input for which integrated + inputs (Tensor or tuple of Tensor): Input for which integrated gradients are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -138,7 +138,7 @@ def attribute( # type: ignore that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define the starting point from which integral is computed and can be provided as: @@ -162,11 +162,12 @@ def attribute( # type: ignore - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -191,7 +192,7 @@ def attribute( # type: ignore target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -210,7 +211,7 @@ def attribute( # type: ignore Default: None n_steps (int, optional): The number of steps used by the approximation method. Default: 50. - method (string, optional): Method for approximating the integral, + method (str, optional): Method for approximating the integral, one of `riemann_right`, `riemann_left`, `riemann_middle`, `riemann_trapezoid` or `gausslegendre`. Default: `gausslegendre` if no method is provided. @@ -232,7 +233,7 @@ def attribute( # type: ignore Default: False Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Integrated gradients with respect to each input feature. attributions will always be the same size as the provided inputs, with each value providing the attribution of the @@ -240,7 +241,7 @@ def attribute( # type: ignore If a single tensor is provided as inputs, a single tensor is returned. If a tuple is provided for inputs, a tuple of corresponding sized tensors is returned. - - **delta** (*tensor*, returned if return_convergence_delta=True): + - **delta** (*Tensor*, returned if return_convergence_delta=True): The difference between the total approximated and true integrated gradients. This is computed using the property that the total sum of forward_func(inputs) - @@ -248,7 +249,7 @@ def attribute( # type: ignore integrated gradient. Delta is calculated per example, meaning that the number of elements in returned delta tensor is equal to the number of - of examples in inputs. + examples in inputs. Examples:: diff --git a/captum/attr/_core/kernel_shap.py b/captum/attr/_core/kernel_shap.py index 2826b30dfe..12da6991dc 100644 --- a/captum/attr/_core/kernel_shap.py +++ b/captum/attr/_core/kernel_shap.py @@ -29,8 +29,8 @@ def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable): The forward function of the model or - any modification of it + forward_func (Callable): The forward function of the model or + any modification of it. """ Lime.__init__( self, @@ -86,7 +86,7 @@ def attribute( # type: ignore Args: - inputs (tensor or tuple of tensors): Input for which KernelShap + inputs (Tensor or tuple of Tensor): Input for which KernelShap is computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -94,7 +94,7 @@ def attribute( # type: ignore that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define the reference value which replaces each feature when the corresponding interpretable feature is set to 0. @@ -120,10 +120,11 @@ def attribute( # type: ignore - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which surrogate model is trained (for classification cases, this is usually the target class). @@ -149,7 +150,7 @@ def attribute( # type: ignore target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -166,7 +167,7 @@ def attribute( # type: ignore Note that attributions are not computed with respect to these arguments. Default: None - feature_mask (tensor or tuple of tensors, optional): + feature_mask (Tensor or tuple of Tensor, optional): feature_mask defines a mask for the input, grouping features which correspond to the same interpretable feature. feature_mask @@ -184,7 +185,7 @@ def attribute( # type: ignore If None, then a feature mask is constructed which assigns each scalar within a tensor as a separate feature. Default: None - n_samples (int, optional): The number of samples of the original + n_samples (int, optional): The number of samples of the original model used to train the surrogate interpretable model. Default: `50` if `n_samples` is not provided. perturbations_per_eval (int, optional): Allows multiple samples @@ -219,8 +220,8 @@ def attribute( # type: ignore Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The attributions with respect to each input feature. If return_input_shape = True, attributions will be the same size as the provided inputs, with each value @@ -316,7 +317,9 @@ def kernel_shap_perturb_generator( Perturbations are sampled by the following process: - Choose k (number of selected features), based on the distribution p(k) = (M - 1) / (k * (M - k)) + where M is the total number of features in the interpretable space + - Randomly select a binary vector with k ones, each sample is equally likely. This is done by generating a random vector of normal values and thresholding based on the top k elements. diff --git a/captum/attr/_core/layer/grad_cam.py b/captum/attr/_core/layer/grad_cam.py index c650409149..bcbcb02af7 100644 --- a/captum/attr/_core/layer/grad_cam.py +++ b/captum/attr/_core/layer/grad_cam.py @@ -47,7 +47,7 @@ class LayerGradCam(LayerAttribution, GradientAttribution): More details regarding the GradCAM method can be found in the original paper here: - https://arxiv.org/pdf/1610.02391.pdf + https://arxiv.org/abs/1610.02391 """ def __init__( @@ -59,13 +59,13 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which attributions are computed. Output size of attribute matches this layer's output dimensions, except for dimension 2, which will be 1, since GradCAM sums over channels. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -86,7 +86,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which attributions + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -94,7 +94,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -119,7 +119,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -151,8 +151,8 @@ def attribute( Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Attributions based on GradCAM method. Attributions will be the same size as the output of the given layer, except for dimension 2, diff --git a/captum/attr/_core/layer/internal_influence.py b/captum/attr/_core/layer/internal_influence.py index 8976fe7344..46aba1ff61 100644 --- a/captum/attr/_core/layer/internal_influence.py +++ b/captum/attr/_core/layer/internal_influence.py @@ -30,7 +30,7 @@ class InternalInfluence(LayerAttribution, GradientAttribution): given input. If no baseline is provided, the default baseline is the zero tensor. More details on this approach can be found here: - https://arxiv.org/pdf/1802.03788.pdf + https://arxiv.org/abs/1802.03788 Note that this method is similar to applying integrated gradients and taking the layer as input, integrating the gradient of the layer with @@ -46,7 +46,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which attributions are computed. Output size of attribute matches this layer's input or @@ -54,7 +54,7 @@ def __init__( the inputs or outputs of the layer, corresponding to attribution of each neuron in the input or output of this layer. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -78,7 +78,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which internal + inputs (Tensor or tuple of Tensor): Input for which internal influence is computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -86,7 +86,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define a starting point from which integral is computed and can be provided as: @@ -115,7 +115,7 @@ def attribute( use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -140,7 +140,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -159,7 +159,7 @@ def attribute( Default: None n_steps (int, optional): The number of steps used by the approximation method. Default: 50. - method (string, optional): Method for approximating the integral, + method (str, optional): Method for approximating the integral, one of `riemann_right`, `riemann_left`, `riemann_middle`, `riemann_trapezoid` or `gausslegendre`. Default: `gausslegendre` if no method is provided. @@ -187,13 +187,13 @@ def attribute( Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Internal influence of each neuron in given layer output. Attributions will always be the same size as the output or input of the given layer depending on whether `attribute_to_layer_input` is set to `False` or - `True`respectively. + `True` respectively. Attributions are returned in a tuple if the layer inputs / outputs contain multiple tensors, otherwise a single tensor is returned. diff --git a/captum/attr/_core/layer/layer_activation.py b/captum/attr/_core/layer/layer_activation.py index 86c511706b..c4244e5966 100644 --- a/captum/attr/_core/layer/layer_activation.py +++ b/captum/attr/_core/layer/layer_activation.py @@ -25,9 +25,9 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it - layer (torch.nn.Module or list(torch.nn.Module)): Layer or layers + layer (torch.nn.Module or list of torch.nn.Module): Layer or layers for which attributions are computed. Output size of attribute matches this layer's input or output dimensions, depending on whether we attribute to @@ -36,7 +36,7 @@ def __init__( this layer. If multiple layers are provided, attributions are returned as a list, each element corresponding to the activations of the corresponding layer. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -54,7 +54,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which layer + inputs (Tensor or tuple of Tensor): Input for which layer activation is computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -62,7 +62,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -87,8 +87,8 @@ def attribute( Default: False Returns: - *tensor* or tuple of *tensors* or *list* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors* or *list*): + *Tensor* or tuple of *Tensor* or list of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor* or *list*): Activation of each neuron in given layer output. Attributions will always be the same size as the output of the given layer. diff --git a/captum/attr/_core/layer/layer_conductance.py b/captum/attr/_core/layer/layer_conductance.py index 3d76569c10..b8d9bc563f 100644 --- a/captum/attr/_core/layer/layer_conductance.py +++ b/captum/attr/_core/layer/layer_conductance.py @@ -32,7 +32,7 @@ class LayerConductance(LayerAttribution, GradientAttribution): The details of the approach can be found here: https://arxiv.org/abs/1805.12233 - https://arxiv.org/pdf/1807.09946.pdf + https://arxiv.org/abs/1807.09946 Note that this provides the total conductance of each neuron in the layer's output. To obtain the breakdown of a neuron's conductance by input @@ -49,7 +49,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which attributions are computed. Output size of attribute matches this layer's input or @@ -57,7 +57,7 @@ def __init__( the inputs or outputs of the layer, corresponding to attribution of each neuron in the input or output of this layer. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -120,7 +120,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which layer + inputs (Tensor or tuple of Tensor): Input for which layer conductance is computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -128,7 +128,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define the starting point from which integral is computed and can be provided as: @@ -152,11 +152,12 @@ def attribute( - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -181,7 +182,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -200,7 +201,7 @@ def attribute( Default: None n_steps (int, optional): The number of steps used by the approximation method. Default: 50. - method (string, optional): Method for approximating the integral, + method (str, optional): Method for approximating the integral, one of `riemann_right`, `riemann_left`, `riemann_middle`, `riemann_trapezoid` or `gausslegendre`. Default: `gausslegendre` if no method is provided. @@ -234,7 +235,7 @@ def attribute( Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Conductance of each neuron in given layer input or output. Attributions will always be the same size as the input or output of the given layer, depending on @@ -244,7 +245,7 @@ def attribute( Attributions are returned in a tuple if the layer inputs / outputs contain multiple tensors, otherwise a single tensor is returned. - - **delta** (*tensor*, returned if return_convergence_delta=True): + - **delta** (*Tensor*, returned if return_convergence_delta=True): The difference between the total approximated and true conductance. This is computed using the property that the total sum of @@ -252,7 +253,7 @@ def attribute( the total sum of the attributions. Delta is calculated per example, meaning that the number of elements in returned delta tensor is equal to the number of - of examples in inputs. + examples in inputs. Examples:: diff --git a/captum/attr/_core/layer/layer_deep_lift.py b/captum/attr/_core/layer/layer_deep_lift.py index 71a8e9eb29..362f250170 100644 --- a/captum/attr/_core/layer/layer_deep_lift.py +++ b/captum/attr/_core/layer/layer_deep_lift.py @@ -69,7 +69,7 @@ def __init__( r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place nonlinear submodules; these are not supported by the register_full_backward_hook PyTorch API starting from PyTorch v1.9. @@ -144,7 +144,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which layer + inputs (Tensor or tuple of Tensor): Input for which layer attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, @@ -153,7 +153,7 @@ def attribute( corresponds to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference samples that are compared with the inputs. In order to assign attribution scores DeepLift computes the differences between the inputs/outputs and @@ -180,11 +180,12 @@ def attribute( - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -209,7 +210,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -236,7 +237,7 @@ def attribute( attribute to the input or output, is a single tensor. Support for multiple tensors will be added later. Default: False - custom_attribution_func (callable, optional): A custom function for + custom_attribution_func (Callable, optional): A custom function for computing final attribution scores. This function can take at least one and at most three arguments with the following signature: @@ -255,7 +256,7 @@ def attribute( Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution score computed based on DeepLift's rescale rule with respect to layer's inputs or outputs. Attributions will always be the same size as the provided layer's inputs or outputs, depending on @@ -264,14 +265,14 @@ def attribute( just a tensor is returned; if the layer input / output has multiple tensors, then a corresponding tuple of tensors is returned. - - **delta** (*tensor*, returned if return_convergence_delta=True): + - **delta** (*Tensor*, returned if return_convergence_delta=True): This is computed using the property that the total sum of forward_func(inputs) - forward_func(baselines) must equal the total sum of the attributions computed based on DeepLift's rescale rule. Delta is calculated per example, meaning that the number of elements in returned delta tensor is equal to the number of - of examples in input. + examples in input. Note that the logic described for deltas is guaranteed when the default logic for attribution computations is used, meaning that the `custom_attribution_func=None`, otherwise @@ -381,12 +382,14 @@ class LayerDeepLiftShap(LayerDeepLift, DeepLiftShap): input flag `attribute_to_layer_input`. More details about the algorithm can be found here: - http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf + https://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf Note that the explanation model: + 1. Assumes that input features are independent of one another 2. Is linear, meaning that the explanations are modeled through the additive composition of feature effects. + Although, it assumes a linear model for each explanation, the overall model across multiple explanations can be complex and non-linear. """ @@ -400,7 +403,7 @@ def __init__( r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place nonlinear submodules; these are not supported by the register_full_backward_hook PyTorch API starting from PyTorch v1.9. @@ -479,7 +482,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which layer + inputs (Tensor or tuple of Tensor): Input for which layer attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -488,7 +491,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (tensor, tuple of tensors, callable): + baselines (Tensor, tuple of Tensor, or Callable): Baselines define reference samples that are compared with the inputs. In order to assign attribution scores DeepLift computes the differences between the inputs/outputs and @@ -513,7 +516,7 @@ def attribute( It is recommended that the number of samples in the baselines' tensors is larger than one. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -538,7 +541,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -564,7 +567,7 @@ def attribute( outputs of internal layers are single tensors. Support for multiple tensors will be added later. Default: False - custom_attribution_func (callable, optional): A custom function for + custom_attribution_func (Callable, optional): A custom function for computing final attribution scores. This function can take at least one and at most three arguments with the following signature: @@ -584,7 +587,7 @@ def attribute( Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution score computed based on DeepLift's rescale rule with respect to layer's inputs or outputs. Attributions will always be the same size as the provided layer's inputs @@ -595,7 +598,7 @@ def attribute( from a forward hook. For standard modules, inputs of a single tensor are usually wrapped in a tuple, while outputs of a single tensor are not. - - **delta** (*tensor*, returned if return_convergence_delta=True): + - **delta** (*Tensor*, returned if return_convergence_delta=True): This is computed using the property that the total sum of forward_func(inputs) - forward_func(baselines) must be very close to the total sum of attributions diff --git a/captum/attr/_core/layer/layer_feature_ablation.py b/captum/attr/_core/layer/layer_feature_ablation.py index 75ac885eac..ee7df14ff7 100644 --- a/captum/attr/_core/layer/layer_feature_ablation.py +++ b/captum/attr/_core/layer/layer_feature_ablation.py @@ -42,7 +42,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which attributions are computed. Output size of attribute matches this layer's input or @@ -50,7 +50,7 @@ def __init__( the inputs or outputs of the layer, corresponding to attribution of each neuron in the input or output of this layer. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself @@ -75,7 +75,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which layer + inputs (Tensor or tuple of Tensor): Input for which layer attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -83,7 +83,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - layer_baselines (scalar, tensor, tuple of scalars or tensors, optional): + layer_baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Layer baselines define reference values which replace each layer input / output value when ablated. Layer baselines should be a single tensor with dimensions @@ -94,7 +94,7 @@ def attribute( In the cases when `baselines` is not provided, we internally use zero as the baseline for each neuron. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -119,7 +119,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -131,7 +131,7 @@ def attribute( Note that attributions are not computed with respect to these arguments. Default: None - layer_mask (tensor or tuple of tensors, optional): + layer_mask (Tensor or tuple of Tensor, optional): layer_mask defines a mask for the layer, grouping elements of the layer input / output which should be ablated together. @@ -171,8 +171,8 @@ def attribute( Default: 1 Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution of each neuron in given layer input or output. Attributions will always be the same size as the input or output of the given layer, depending on diff --git a/captum/attr/_core/layer/layer_gradient_shap.py b/captum/attr/_core/layer/layer_gradient_shap.py index 9473475cdf..b6dfda9106 100644 --- a/captum/attr/_core/layer/layer_gradient_shap.py +++ b/captum/attr/_core/layer/layer_gradient_shap.py @@ -29,7 +29,7 @@ class LayerGradientShap(LayerAttribution, GradientAttribution): #deep-learning-example-with-gradientexplainer-tensorflowkeraspytorch-models A Unified Approach to Interpreting Model Predictions - http://papers.nips.cc/paper\ + https://papers.nips.cc/paper\ 7062-a-unified-approach-to-interpreting-model-predictions GradientShap approximates SHAP values by computing the expectations of @@ -52,7 +52,7 @@ class LayerGradientShap(LayerAttribution, GradientAttribution): In some sense it can be viewed as an approximation of integrated gradients by computing the expectations of gradients for different baselines. - Current implementation uses Smoothgrad from `NoiseTunnel` in order to + Current implementation uses Smoothgrad from :class:`.NoiseTunnel` in order to randomly draw samples from the distribution of baselines, add noise to input samples and compute the expectation (smoothgrad). """ @@ -67,7 +67,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which attributions are computed. Output size of attribute matches this layer's input or @@ -75,7 +75,7 @@ def __init__( the inputs or outputs of the layer, corresponding to attribution of each neuron in the input or output of this layer. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -146,7 +146,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input which are used to compute + inputs (Tensor or tuple of Tensor): Input which are used to compute SHAP attribution values for a given `layer`. If `forward_func` takes a single tensor as input, a single input tensor should be provided. @@ -155,7 +155,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (tensor, tuple of tensors, callable): + baselines (Tensor, tuple of Tensor, or Callable): Baselines define the starting point from which expectation is computed and can be provided as: @@ -178,11 +178,11 @@ def attribute( It is recommended that the number of samples in the baselines' tensors is larger than one. - n_samples (int, optional): The number of randomly generated examples + n_samples (int, optional): The number of randomly generated examples per sample in the input batch. Random examples are generated by adding gaussian random noise to each sample. Default: `5` if `n_samples` is not provided. - stdevs (float, or a tuple of floats optional): The standard deviation + stdevs (float or tuple of float, optional): The standard deviation of gaussian noise with zero mean that is added to each input in the batch. If `stdevs` is a single float value then that same value is used for all inputs. If it is @@ -191,7 +191,7 @@ def attribute( corresponds to the input with the same index in the inputs tuple. Default: 0.0 - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -216,7 +216,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It can contain a tuple of ND tensors or @@ -246,7 +246,7 @@ def attribute( Default: False Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution score computed based on GradientSHAP with respect to layer's input or output. Attributions will always be the same size as the provided layer's inputs or outputs, @@ -255,7 +255,7 @@ def attribute( Attributions are returned in a tuple if the layer inputs / outputs contain multiple tensors, otherwise a single tensor is returned. - - **delta** (*tensor*, returned if return_convergence_delta=True): + - **delta** (*Tensor*, returned if return_convergence_delta=True): This is computed using the property that the total sum of forward_func(inputs) - forward_func(baselines) must be very close to the total sum of the attributions @@ -335,7 +335,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which attributions are computed. Output size of attribute matches this layer's input or @@ -343,7 +343,7 @@ def __init__( the inputs or outputs of the layer, corresponding to attribution of each neuron in the input or output of this layer. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, diff --git a/captum/attr/_core/layer/layer_gradient_x_activation.py b/captum/attr/_core/layer/layer_gradient_x_activation.py index a63a5d7abe..385a1491c4 100644 --- a/captum/attr/_core/layer/layer_gradient_x_activation.py +++ b/captum/attr/_core/layer/layer_gradient_x_activation.py @@ -30,9 +30,9 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it - layer (torch.nn.Module or list(torch.nn.Module)): Layer or layers + layer (torch.nn.Module or list of torch.nn.Module): Layer or layers for which attributions are computed. Output size of attribute matches this layer's input or output dimensions, depending on whether we attribute to @@ -41,7 +41,7 @@ def __init__( this layer. If multiple layers are provided, attributions are returned as a list, each element corresponding to the attributions of the corresponding layer. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -80,7 +80,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which attributions + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -88,7 +88,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -113,7 +113,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -134,8 +134,8 @@ def attribute( Default: False Returns: - *tensor* or tuple of *tensors* or *list* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors* or *list*): + *Tensor* or tuple of *Tensor* or list of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor* or *list*): Product of gradient and activation for each neuron in given layer output. Attributions will always be the same size as the diff --git a/captum/attr/_core/layer/layer_integrated_gradients.py b/captum/attr/_core/layer/layer_integrated_gradients.py index 2e769a5658..d67f52cad5 100644 --- a/captum/attr/_core/layer/layer_integrated_gradients.py +++ b/captum/attr/_core/layer/layer_integrated_gradients.py @@ -41,7 +41,6 @@ class LayerIntegratedGradients(LayerAttribution, GradientAttribution): More details regarding the integrated gradients method can be found in the original paper: https://arxiv.org/abs/1703.01365 - """ def __init__( @@ -53,12 +52,12 @@ def __init__( ) -> None: r""" Args: - forward_func (callable): The forward function of the model or any + + forward_func (Callable): The forward function of the model or any modification of it - layer (ModuleOrModuleList): - Layer or list of layers for which attributions are computed. - For each layer the output size of the attribute matches - this layer's input or output dimensions, depending on + layer (ModuleOrModuleList): Layer or list of layers for which attributions + are computed. For each layer the output size of the attribute + matches this layer's input or output dimensions, depending on whether we attribute to the inputs or outputs of the layer, corresponding to the attribution of each neuron in the input or output of this layer. @@ -74,7 +73,7 @@ def __init__( dependence, e.g. if you pass in l2 you cannot pass in l1 or l3. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -101,7 +100,7 @@ def __init__( if isinstance(layer, list) and len(layer) > 1: warnings.warn( "Multiple layers provided. Please ensure that each layer is" - "**not** solely solely dependent on the outputs of" + "**not** solely dependent on the outputs of" "another layer. Please refer to the documentation for more" "detail." ) @@ -192,7 +191,7 @@ def attribute( Args: - inputs (tensor or tuple of tensors): Input for which layer integrated + inputs (Tensor or tuple of Tensor): Input for which layer integrated gradients are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -200,7 +199,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define the starting point from which integral is computed and can be provided as: @@ -214,6 +213,7 @@ def attribute( - a tuple of tensors or scalars, the baseline corresponding to each tensor in the inputs' tuple can be: + - either a tensor with matching dimensions to corresponding tensor in the inputs' tuple or the first dimension is one and the remaining @@ -227,7 +227,7 @@ def attribute( use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -252,7 +252,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -261,17 +261,19 @@ def attribute( tensors or any arbitrary python types. These arguments are provided to forward_func in order following the arguments in inputs. + For a tensor, the first dimension of the tensor must correspond to the number of examples. It will be repeated for each of `n_steps` along the integrated path. For all other types, the given argument is used for all forward evaluations. + Note that attributions are not computed with respect to these arguments. Default: None n_steps (int, optional): The number of steps used by the approximation method. Default: 50. - method (string, optional): Method for approximating the integral, + method (str, optional): Method for approximating the integral, one of `riemann_right`, `riemann_left`, `riemann_middle`, `riemann_trapezoid` or `gausslegendre`. Default: `gausslegendre` if no method is provided. @@ -280,6 +282,7 @@ def attribute( which are computed (forward / backward passes) sequentially. internal_batch_size must be at least equal to #examples. + For DataParallel models, each batch is split among the available devices, so evaluations on each available device contain internal_batch_size / num_devices examples. @@ -297,16 +300,19 @@ def attribute( then the attributions will be computed with respect to layer input, otherwise it will be computed with respect to layer output. + Note that currently it is assumed that either the input or the output of internal layer, depending on whether we attribute to the input or output, is a single tensor. Support for multiple tensors will be added later. Default: False + Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor*, tuple of *tensors* or tuple of *tensors*): - Integrated gradients with respect to `layer`'s inputs or - outputs. Attributions will always be the same size and + + - **attributions** (*Tensor*, tuple of *Tensor* or tuple of + *Tensor*): Integrated gradients with respect to `layer`'s inputs + or outputs. Attributions will always be the same size and dimensionality as the input or output of the given layer, depending on whether we attribute to the inputs or outputs of the layer which is decided by the input flag @@ -323,7 +329,8 @@ def attribute( multiple tensors: the corresponding output element will be a tuple of tensors. The ordering of the outputs will be the same order as the layers given in the constructor. - - **delta** (*tensor*, returned if return_convergence_delta=True): + + - **delta** (*Tensor*, returned if return_convergence_delta=True): The difference between the total approximated and true integrated gradients. This is computed using the property that the total sum of forward_func(inputs) - @@ -331,7 +338,7 @@ def attribute( integrated gradient. Delta is calculated per example, meaning that the number of elements in returned delta tensor is equal to the number of - of examples in inputs. + examples in inputs. Examples:: diff --git a/captum/attr/_core/layer/layer_lrp.py b/captum/attr/_core/layer/layer_lrp.py index bdc328f47e..1f78d1fdd8 100644 --- a/captum/attr/_core/layer/layer_lrp.py +++ b/captum/attr/_core/layer/layer_lrp.py @@ -42,7 +42,7 @@ def __init__(self, model: Module, layer: ModuleOrModuleList) -> None: """ Args: - model (module): The forward function of the model or + model (Module): The forward function of the model or any modification of it. Custom rules for a given layer need to be defined as attribute `module.rule` and need to be of type PropagationRule. @@ -50,8 +50,7 @@ def __init__(self, model: Module, layer: ModuleOrModuleList) -> None: these are not supported by the register_full_backward_hook PyTorch API starting from PyTorch v1.9. - - layer (torch.nn.Module or list(torch.nn.Module)): Layer or layers + layer (torch.nn.Module or list of torch.nn.Module): Layer or layers for which attributions are computed. The size and dimensionality of the attributions corresponds to the size and dimensionality of the layer's @@ -110,9 +109,9 @@ def attribute( ], ]: r""" - Args: - inputs (tensor or tuple of tensors): Input for which relevance is + + inputs (Tensor or tuple of Tensor): Input for which relevance is propagated. If forward_func takes a single tensor as input, a single input tensor should be provided. @@ -121,12 +120,12 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for - which gradients are computed (for classification cases, - this is usually the target class). - If the network returns a scalar value per example, - no target index is necessary. - For general 2D outputs, targets can be either: + target (int, tuple, Tensor, or list, optional): Output indices for + which gradients are computed (for classification cases, + this is usually the target class). + If the network returns a scalar value per example, + no target index is necessary. + For general 2D outputs, targets can be either: - a single integer or a tensor containing a single integer, which is applied to all input examples @@ -176,9 +175,10 @@ def attribute( Default: False Returns: - *tensor* or tuple of *tensors* of **attributions** or 2-element tuple of - **attributions**, **delta** or lists of **attributions** and **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions** or 2-element tuple of + **attributions**, **delta** or list of **attributions** and **delta**: + + - **attributions** (*Tensor* or tuple of *Tensor*): The propagated relevance values with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value @@ -190,14 +190,15 @@ def attribute( implementations. If attributions for all layers are returned (layer=None) a list of tensors or tuples of tensors is returned with entries for each layer. - - **delta** (*tensor* or list of *tensors* - returned if return_convergence_delta=True): + - **delta** (*Tensor* or list of *Tensor* + returned if return_convergence_delta=True): Delta is calculated per example, meaning that the number of elements in returned delta tensor is equal to the number of - of examples in input. + examples in input. If attributions for all layers are returned (layer=None) a list of tensors is returned with entries for each layer. + Examples:: >>> # ImageClassifier takes a single input tensor of images Nx3x32x32, diff --git a/captum/attr/_core/lime.py b/captum/attr/_core/lime.py index f5ad7877bc..1f94bb9cb2 100644 --- a/captum/attr/_core/lime.py +++ b/captum/attr/_core/lime.py @@ -82,7 +82,7 @@ def __init__( Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it. If a batch is provided as input for attribution, it is expected that forward_func returns a scalar representing the entire batch. @@ -106,7 +106,7 @@ def __init__( Note that calling fit multiple times should retrain the interpretable model, each attribution call reuses the same given interpretable model object. - similarity_func (callable): Function which takes a single sample + similarity_func (Callable): Function which takes a single sample along with its corresponding interpretable representation and returns the weight of the interpretable sample for training interpretable model. Weight is generally @@ -131,7 +131,7 @@ def __init__( All kwargs passed to the attribute method are provided as keyword arguments (kwargs) to this callable. - perturb_func (callable): Function which returns a single + perturb_func (Callable): Function which returns a single sampled input, generally a perturbation of the original input, which is used to train the interpretable surrogate model. Function can return samples in either @@ -171,7 +171,7 @@ def __init__( input. Once sampled, inputs can be converted to / from the interpretable representation with either to_interp_rep_transform or from_interp_rep_transform. - from_interp_rep_transform (callable): Function which takes a + from_interp_rep_transform (Callable): Function which takes a single sampled interpretable representation (tensor of shape 1 x num_interp_features) and returns the corresponding representation in the input space @@ -194,7 +194,7 @@ def __init__( All kwargs passed to the attribute method are provided as keyword arguments (kwargs) to this callable. - to_interp_rep_transform (callable): Function which takes a + to_interp_rep_transform (Callable): Function which takes a sample in the original input space and converts to its interpretable representation (tensor of shape 1 x num_interp_features). @@ -266,7 +266,7 @@ def attribute( Args: - inputs (tensor or tuple of tensors): Input for which LIME + inputs (Tensor or tuple of Tensor): Input for which LIME is computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -274,7 +274,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which surrogate model is trained (for classification cases, this is usually the target class). @@ -300,7 +300,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -315,7 +315,7 @@ def attribute( Note that attributions are not computed with respect to these arguments. Default: None - n_samples (int, optional): The number of samples of the original + n_samples (int, optional): The number of samples of the original model used to train the surrogate interpretable model. Default: `50` if `n_samples` is not provided. perturbations_per_eval (int, optional): Allows multiple samples @@ -569,7 +569,7 @@ def default_from_interp_rep_transform(curr_sample, original_inputs, **kwargs): ), "Must provide feature_mask to use default interpretable representation transform" assert ( "baselines" in kwargs - ), "Must provide baselines to use default interpretable representation transfrom" + ), "Must provide baselines to use default interpretable representation transform" feature_mask = kwargs["feature_mask"] if isinstance(feature_mask, Tensor): binary_mask = curr_sample[0][feature_mask].bool() @@ -603,7 +603,7 @@ def get_exp_kernel_similarity_function( Args: - distance_mode (str, optional): Distance mode can be either "cosine" or + distance_mode (str, optional): Distance mode can be either "cosine" or "euclidean" corresponding to either cosine distance or Euclidean distance respectively. Distance is computed by flattening the original inputs and perturbed inputs @@ -732,7 +732,7 @@ def __init__( Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it interpretable_model (Model, optional): Model object to train interpretable model. @@ -760,14 +760,14 @@ def __init__( Note that calling fit multiple times should retrain the interpretable model, each attribution call reuses the same given interpretable model object. - similarity_func (callable, optional): Function which takes a single sample + similarity_func (Callable, optional): Function which takes a single sample along with its corresponding interpretable representation and returns the weight of the interpretable sample for training the interpretable model. This is often referred to as a similarity kernel. This argument is optional and defaults to a function which - applies an exponential kernel to the consine distance between + applies an exponential kernel to the cosine distance between the original input and perturbed input, with a kernel width of 1.0. @@ -793,7 +793,7 @@ def __init__( kwargs includes baselines, feature_mask, num_interp_features (integer, determined from feature mask). - perturb_func (callable, optional): Function which returns a single + perturb_func (Callable, optional): Function which returns a single sampled input, which is a binary vector of length num_interp_features, or a generator of such tensors. @@ -879,7 +879,7 @@ def attribute( # type: ignore Args: - inputs (tensor or tuple of tensors): Input for which LIME + inputs (Tensor or tuple of Tensor): Input for which LIME is computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -887,7 +887,7 @@ def attribute( # type: ignore that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference value which replaces each feature when the corresponding interpretable feature is set to 0. @@ -913,10 +913,11 @@ def attribute( # type: ignore - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which surrogate model is trained (for classification cases, this is usually the target class). @@ -942,7 +943,7 @@ def attribute( # type: ignore target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -959,7 +960,7 @@ def attribute( # type: ignore Note that attributions are not computed with respect to these arguments. Default: None - feature_mask (tensor or tuple of tensors, optional): + feature_mask (Tensor or tuple of Tensor, optional): feature_mask defines a mask for the input, grouping features which correspond to the same interpretable feature. feature_mask @@ -977,7 +978,7 @@ def attribute( # type: ignore If None, then a feature mask is constructed which assigns each scalar within a tensor as a separate feature. Default: None - n_samples (int, optional): The number of samples of the original + n_samples (int, optional): The number of samples of the original model used to train the surrogate interpretable model. Default: `50` if `n_samples` is not provided. perturbations_per_eval (int, optional): Allows multiple samples @@ -1012,8 +1013,8 @@ def attribute( # type: ignore Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The attributions with respect to each input feature. If return_input_shape = True, attributions will be the same size as the provided inputs, with each value diff --git a/captum/attr/_core/lrp.py b/captum/attr/_core/lrp.py index e11d0b8544..d557f0ce20 100644 --- a/captum/attr/_core/lrp.py +++ b/captum/attr/_core/lrp.py @@ -45,7 +45,7 @@ def __init__(self, model: Module) -> None: r""" Args: - model (module): The forward function of the model or any modification of + model (Module): The forward function of the model or any modification of it. Custom rules for a given layer need to be defined as attribute `module.rule` and need to be of type PropagationRule. If no rule is specified for a layer, a pre-defined default rule for the module type @@ -98,7 +98,8 @@ def attribute( ]: r""" Args: - inputs (tensor or tuple of tensors): Input for which relevance is + + inputs (Tensor or tuple of Tensor): Input for which relevance is propagated. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -106,12 +107,13 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for - which gradients are computed (for classification cases, - this is usually the target class). - If the network returns a scalar value per example, - no target index is necessary. - For general 2D outputs, targets can be either: + + target (int, tuple, Tensor, or list, optional): Output indices for + which gradients are computed (for classification cases, + this is usually the target class). + If the network returns a scalar value per example, + no target index is necessary. + For general 2D outputs, targets can be either: - a single integer or a tensor containing a single integer, which is applied to all input examples @@ -153,9 +155,10 @@ def attribute( of rules is printed during propagation. Returns: - *tensor* or tuple of *tensors* of **attributions** - or 2-element tuple of **attributions**, **delta**:: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions** + or 2-element tuple of **attributions**, **delta**: + + - **attributions** (*Tensor* or tuple of *Tensor*): The propagated relevance values with respect to each input feature. The values are normalized by the output score value (sum(relevance)=1). To obtain values comparable to other @@ -168,10 +171,12 @@ def attribute( corresponding sized tensors is returned. The sum of attributions is one and not corresponding to the prediction score as in other implementations. - - **delta** (*tensor*, returned if return_convergence_delta=True): + + - **delta** (*Tensor*, returned if return_convergence_delta=True): Delta is calculated per example, meaning that the number of elements in returned delta tensor is equal to the number of of examples in the inputs. + Examples:: >>> # ImageClassifier takes a single input tensor of images Nx3x32x32, @@ -241,7 +246,7 @@ def compute_convergence_delta( Args: - attributions (tensor or tuple of tensors): Attribution scores that + attributions (Tensor or tuple of Tensor): Attribution scores that are precomputed by an attribution algorithm. Attributions can be provided in form of a single tensor or a tuple of those. It is assumed that attribution @@ -249,12 +254,13 @@ def compute_convergence_delta( examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - output (tensor with single element): The output value with respect to which + output (Tensor): The output value with respect to which the attribution values are computed. This value corresponds to - the target score of a classification model. + the target score of a classification model. The given tensor + should only have a single element. Returns: - *tensor*: + *Tensor*: - **delta** Difference of relevance in output layer and input layer. """ if isinstance(attributions, tuple): diff --git a/captum/attr/_core/neuron/neuron_conductance.py b/captum/attr/_core/neuron/neuron_conductance.py index dec6b39b01..004d941cb9 100644 --- a/captum/attr/_core/neuron/neuron_conductance.py +++ b/captum/attr/_core/neuron/neuron_conductance.py @@ -45,7 +45,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which neuron attributions are computed. Attributions for a particular neuron in the input or output @@ -62,7 +62,7 @@ def __init__( Currently, it is assumed that the inputs or the outputs of the layer, depending on which one is used for attribution, can only be a single tensor. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -103,7 +103,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which neuron + inputs (Tensor or tuple of Tensor): Input for which neuron conductance is computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -111,7 +111,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - neuron_selector (int, callable, or tuple of ints or slices): + neuron_selector (int, Callable, tuple of int, or slice): Selector for neuron in given layer for which attribution is desired. Neuron selector can be provided as: @@ -143,7 +143,7 @@ def attribute( the gradient of output with respect to the intermedite neuron, which cannot be computed for aggregations of multiple intemediate neurons. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define the starting point from which integral is computed and can be provided as: @@ -172,7 +172,7 @@ def attribute( use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -197,7 +197,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -216,7 +216,7 @@ def attribute( Default: None n_steps (int, optional): The number of steps used by the approximation method. Default: 50. - method (string, optional): Method for approximating the integral, + method (str, optional): Method for approximating the integral, one of `riemann_right`, `riemann_left`, `riemann_middle`, `riemann_trapezoid` or `gausslegendre`. Default: `gausslegendre` if no method is provided. @@ -244,8 +244,8 @@ def attribute( Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Conductance for particular neuron with respect to each input feature. Attributions will always be the same size as the provided diff --git a/captum/attr/_core/neuron/neuron_deep_lift.py b/captum/attr/_core/neuron/neuron_deep_lift.py index aff216d37a..d486bdea51 100644 --- a/captum/attr/_core/neuron/neuron_deep_lift.py +++ b/captum/attr/_core/neuron/neuron_deep_lift.py @@ -46,7 +46,7 @@ def __init__( r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place nonlinear submodules; these are not supported by the register_full_backward_hook PyTorch API starting from PyTorch v1.9. @@ -90,7 +90,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which layer + inputs (Tensor or tuple of Tensor): Input for which layer attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, @@ -99,7 +99,7 @@ def attribute( corresponds to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - neuron_selector (int, callable, or tuple of ints or slices): + neuron_selector (int, Callable, tuple of int, or slice): Selector for neuron in given layer for which attribution is desired. Neuron selector can be provided as: @@ -120,7 +120,7 @@ def attribute( indexed output tensor is used for attribution. Note that specifying a slice of a tensor would amount to computing the attribution of the sum of the specified - neurons, and not the individual neurons independantly. + neurons, and not the individual neurons independently. - a callable, which should take the target layer as input (single tensor or tuple @@ -133,7 +133,7 @@ def attribute( or a 1D tensor with length equal to batch_size (one scalar per input example) - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference samples that are compared with the inputs. In order to assign attribution scores DeepLift computes the differences between the inputs/outputs and @@ -165,7 +165,7 @@ def attribute( use zero scalar corresponding to each input tensor. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -187,7 +187,7 @@ def attribute( attribute to the input or output, is a single tensor. Support for multiple tensors will be added later. Default: False - custom_attribution_func (callable, optional): A custom function for + custom_attribution_func (Callable, optional): A custom function for computing final attribution scores. This function can take at least one and at most three arguments with the following signature: @@ -207,7 +207,7 @@ def attribute( Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Computes attributions using Deeplift's rescale rule for particular neuron with respect to each input feature. Attributions will always be the same size as the provided @@ -273,12 +273,13 @@ class NeuronDeepLiftShap(NeuronAttribution, GradientAttribution): by the input flag `attribute_to_layer_input`. More details about the algorithm can be found here: - http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf + https://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf Note that the explanation model: 1. Assumes that input features are independent of one another 2. Is linear, meaning that the explanations are modeled through the additive composition of feature effects. + Although, it assumes a linear model for each explanation, the overall model across multiple explanations can be complex and non-linear. """ @@ -289,7 +290,7 @@ def __init__( r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place nonlinear submodules; these are not supported by the register_full_backward_hook PyTorch API starting from PyTorch v1.9. @@ -334,7 +335,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which layer + inputs (Tensor or tuple of Tensor): Input for which layer attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, @@ -343,7 +344,7 @@ def attribute( corresponds to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - neuron_selector (int, callable, or tuple of ints or slices): + neuron_selector (int, Callable, tuple of int, or slice): Selector for neuron in given layer for which attribution is desired. Neuron selector can be provided as: @@ -364,7 +365,7 @@ def attribute( indexed output tensor is used for attribution. Note that specifying a slice of a tensor would amount to computing the attribution of the sum of the specified - neurons, and not the individual neurons independantly. + neurons, and not the individual neurons independently. - a callable, which should take the target layer as input (single tensor or tuple @@ -376,7 +377,8 @@ def attribute( this function returns either a tensor with one element or a 1D tensor with length equal to batch_size (one scalar per input example) - baselines (tensor, tuple of tensors, callable): + + baselines (Tensor, tuple of Tensor, or Callable): Baselines define reference samples that are compared with the inputs. In order to assign attribution scores DeepLift computes the differences between the inputs/outputs and @@ -401,7 +403,7 @@ def attribute( It is recommended that the number of samples in the baselines' tensors is larger than one. - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -423,7 +425,7 @@ def attribute( attribute to the input or output, is a single tensor. Support for multiple tensors will be added later. Default: False - custom_attribution_func (callable, optional): A custom function for + custom_attribution_func (Callable, optional): A custom function for computing final attribution scores. This function can take at least one and at most three arguments with the following signature: @@ -443,7 +445,7 @@ def attribute( Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Computes attributions using Deeplift's rescale rule for particular neuron with respect to each input feature. Attributions will always be the same size as the provided diff --git a/captum/attr/_core/neuron/neuron_feature_ablation.py b/captum/attr/_core/neuron/neuron_feature_ablation.py index d706f71cb4..8ee73197da 100644 --- a/captum/attr/_core/neuron/neuron_feature_ablation.py +++ b/captum/attr/_core/neuron/neuron_feature_ablation.py @@ -35,7 +35,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which attributions are computed. Attributions for a particular neuron in the input or output @@ -44,7 +44,7 @@ def __init__( Currently, it is assumed that the inputs or the outputs of the layer, depending on which one is used for attribution, can only be a single tensor. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -67,7 +67,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which neuron + inputs (Tensor or tuple of Tensor): Input for which neuron attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -75,7 +75,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - neuron_selector (int, callable, or tuple of ints or slices): + neuron_selector (int, Callable, tuple of int, or slice): Selector for neuron in given layer for which attribution is desired. Neuron selector can be provided as: @@ -96,7 +96,7 @@ def attribute( indexed output tensor is used for attribution. Note that specifying a slice of a tensor would amount to computing the attribution of the sum of the specified - neurons, and not the individual neurons independantly. + neurons, and not the individual neurons independently. - a callable, which should take the target layer as input (single tensor or tuple @@ -108,7 +108,8 @@ def attribute( this function returns either a tensor with one element or a 1D tensor with length equal to batch_size (one scalar per input example) - baselines (scalar, tensor, tuple of scalars or tensors, optional): + + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference value which replaces each feature when ablated. Baselines can be provided as: @@ -132,10 +133,11 @@ def attribute( - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -147,7 +149,7 @@ def attribute( Note that attributions are not computed with respect to these arguments. Default: None - feature_mask (tensor or tuple of tensors, optional): + feature_mask (Tensor or tuple of Tensor, optional): feature_mask defines a mask for the input, grouping features which should be ablated together. feature_mask should contain the same number of tensors as inputs. @@ -187,8 +189,8 @@ def attribute( Default: 1 Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Attributions of particular neuron with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value providing the attribution diff --git a/captum/attr/_core/neuron/neuron_gradient.py b/captum/attr/_core/neuron/neuron_gradient.py index 5292990bbf..d948dfee1a 100644 --- a/captum/attr/_core/neuron/neuron_gradient.py +++ b/captum/attr/_core/neuron/neuron_gradient.py @@ -33,7 +33,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which attributions are computed. Output size of attribute matches this layer's input or @@ -44,7 +44,7 @@ def __init__( Currently, it is assumed that the inputs or the outputs of the layer, depending on which one is used for attribution, can only be a single tensor. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -64,7 +64,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which neuron + inputs (Tensor or tuple of Tensor): Input for which neuron gradients are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -72,7 +72,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - neuron_selector (int, callable, or tuple of ints or slices): + neuron_selector (int, Callable, tuple of int, or slice): Selector for neuron in given layer for which attribution is desired. Neuron selector can be provided as: @@ -93,7 +93,7 @@ def attribute( indexed output tensor is used for attribution. Note that specifying a slice of a tensor would amount to computing the attribution of the sum of the specified - neurons, and not the individual neurons independantly. + neurons, and not the individual neurons independently. - a callable, which should take the target layer as input (single tensor or tuple @@ -105,7 +105,7 @@ def attribute( this function returns either a tensor with one element or a 1D tensor with length equal to batch_size (one scalar per input example) - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -130,8 +130,8 @@ def attribute( Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Gradients of particular neuron with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value providing the attribution diff --git a/captum/attr/_core/neuron/neuron_gradient_shap.py b/captum/attr/_core/neuron/neuron_gradient_shap.py index 42a543b50d..338949352e 100644 --- a/captum/attr/_core/neuron/neuron_gradient_shap.py +++ b/captum/attr/_core/neuron/neuron_gradient_shap.py @@ -18,7 +18,7 @@ class NeuronGradientShap(NeuronAttribution, GradientAttribution): #deep-learning-example-with-gradientexplainer-tensorflowkeraspytorch-models A Unified Approach to Interpreting Model Predictions - http://papers.nips.cc/paper\ + https://papers.nips.cc/paper\ 7062-a-unified-approach-to-interpreting-model-predictions GradientShap approximates SHAP values by computing the expectations of @@ -41,7 +41,7 @@ class NeuronGradientShap(NeuronAttribution, GradientAttribution): In some sense it can be viewed as an approximation of integrated gradients by computing the expectations of gradients for different baselines. - Current implementation uses Smoothgrad from `NoiseTunnel` in order to + Current implementation uses Smoothgrad from :class:`.NoiseTunnel` in order to randomly draw samples from the distribution of baselines, add noise to input samples and compute the expectation (smoothgrad). """ @@ -56,17 +56,17 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which neuron attributions are computed. The output size of the attribute method matches the - dimensions of the inputs or ouputs of the neuron with + dimensions of the inputs or outputs of the neuron with index `neuron_selector` in this layer, depending on whether we attribute to the inputs or outputs of the neuron. Currently, it is assumed that the inputs or the outputs of the neurons in this layer, depending on which one is used for attribution, can only be a single tensor. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -106,7 +106,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which SHAP attribution + inputs (Tensor or tuple of Tensor): Input for which SHAP attribution values are computed. If `forward_func` takes a single tensor as input, a single input tensor should be provided. If `forward_func` takes multiple tensors as input, a tuple @@ -114,7 +114,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - neuron_selector (int, callable, or tuple of ints or slices): + neuron_selector (int, Callable, tuple of int, or slice): Selector for neuron in given layer for which attribution is desired. Neuron selector can be provided as: @@ -135,7 +135,7 @@ def attribute( indexed output tensor is used for attribution. Note that specifying a slice of a tensor would amount to computing the attribution of the sum of the specified - neurons, and not the individual neurons independantly. + neurons, and not the individual neurons independently. - a callable, which should take the target layer as input (single tensor or tuple @@ -147,7 +147,7 @@ def attribute( this function returns either a tensor with one element or a 1D tensor with length equal to batch_size (one scalar per input example) - baselines (tensor, tuple of tensors, callable): + baselines (Tensor, tuple of Tensor, or Callable): Baselines define the starting point from which expectation is computed and can be provided as: @@ -170,11 +170,11 @@ def attribute( It is recommended that the number of samples in the baselines' tensors is larger than one. - n_samples (int, optional): The number of randomly generated examples + n_samples (int, optional): The number of randomly generated examples per sample in the input batch. Random examples are generated by adding gaussian random noise to each sample. Default: `5` if `n_samples` is not provided. - stdevs (float, or a tuple of floats optional): The standard deviation + stdevs (float or tuple of float, optional): The standard deviation of gaussian noise with zero mean that is added to each input in the batch. If `stdevs` is a single float value then that same value is used for all inputs. If it is @@ -183,7 +183,7 @@ def attribute( corresponds to the input with the same index in the inputs tuple. Default: 0.0 - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It can contain a tuple of ND tensors or @@ -209,7 +209,7 @@ def attribute( Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution score computed based on GradientSHAP with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value diff --git a/captum/attr/_core/neuron/neuron_guided_backprop_deconvnet.py b/captum/attr/_core/neuron/neuron_guided_backprop_deconvnet.py index 7c69aed87a..b9a5e80b7f 100644 --- a/captum/attr/_core/neuron/neuron_guided_backprop_deconvnet.py +++ b/captum/attr/_core/neuron/neuron_guided_backprop_deconvnet.py @@ -35,7 +35,7 @@ def __init__( r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place ReLU submodules; these are not supported by the register_full_backward_hook PyTorch API starting from PyTorch v1.9. @@ -48,7 +48,7 @@ def __init__( Currently, it is assumed that the inputs or the outputs of the layer, depending on which one is used for attribution, can only be a single tensor. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -69,7 +69,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -78,7 +78,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - neuron_selector (int, callable, or tuple of ints or slices): + neuron_selector (int, Callable, tuple of int, or slice): Selector for neuron in given layer for which attribution is desired. Neuron selector can be provided as: @@ -99,7 +99,7 @@ def attribute( indexed output tensor is used for attribution. Note that specifying a slice of a tensor would amount to computing the attribution of the sum of the specified - neurons, and not the individual neurons independantly. + neurons, and not the individual neurons independently. - a callable, which should take the target layer as input (single tensor or tuple @@ -111,7 +111,7 @@ def attribute( this function returns either a tensor with one element or a 1D tensor with length equal to batch_size (one scalar per input example) - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -134,8 +134,8 @@ def attribute( Support for multiple tensors will be added later. Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Deconvolution attribution of particular neuron with respect to each input feature. Attributions will always be the same size as the provided @@ -207,7 +207,7 @@ def __init__( r""" Args: - model (nn.Module): The reference to PyTorch model instance. Model cannot + model (nn.Module): The reference to PyTorch model instance. Model cannot contain any in-place ReLU submodules; these are not supported by the register_full_backward_hook PyTorch API starting from PyTorch v1.9. @@ -217,7 +217,7 @@ def __init__( in the attribute method. Currently, only layers with a single tensor output are supported. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -238,7 +238,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -247,7 +247,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - neuron_selector (int, callable, or tuple of ints or slices): + neuron_selector (int, Callable, tuple of int, or slice): Selector for neuron in given layer for which attribution is desired. Neuron selector can be provided as: @@ -268,7 +268,7 @@ def attribute( indexed output tensor is used for attribution. Note that specifying a slice of a tensor would amount to computing the attribution of the sum of the specified - neurons, and not the individual neurons independantly. + neurons, and not the individual neurons independently. - a callable, which should take the target layer as input (single tensor or tuple @@ -280,7 +280,7 @@ def attribute( this function returns either a tensor with one element or a 1D tensor with length equal to batch_size (one scalar per input example) - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -303,8 +303,8 @@ def attribute( Support for multiple tensors will be added later. Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Guided backprop attribution of particular neuron with respect to each input feature. Attributions will always be the same size as the provided diff --git a/captum/attr/_core/neuron/neuron_integrated_gradients.py b/captum/attr/_core/neuron/neuron_integrated_gradients.py index f67aec7e7e..2afc17180f 100644 --- a/captum/attr/_core/neuron/neuron_integrated_gradients.py +++ b/captum/attr/_core/neuron/neuron_integrated_gradients.py @@ -33,7 +33,7 @@ def __init__( r""" Args: - forward_func (callable): The forward function of the model or any + forward_func (Callable): The forward function of the model or any modification of it layer (torch.nn.Module): Layer for which attributions are computed. Output size of attribute matches this layer's input or @@ -44,7 +44,7 @@ def __init__( Currently, it is assumed that the inputs or the outputs of the layer, depending on which one is used for attribution, can only be a single tensor. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model. This allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -84,7 +84,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which neuron integrated + inputs (Tensor or tuple of Tensor): Input for which neuron integrated gradients are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -92,7 +92,7 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - neuron_selector (int, callable, or tuple of ints or slices): + neuron_selector (int, Callable, tuple of int, or slice): Selector for neuron in given layer for which attribution is desired. Neuron selector can be provided as: @@ -113,7 +113,7 @@ def attribute( indexed output tensor is used for attribution. Note that specifying a slice of a tensor would amount to computing the attribution of the sum of the specified - neurons, and not the individual neurons independantly. + neurons, and not the individual neurons independently. - a callable, which should take the target layer as input (single tensor or tuple @@ -125,7 +125,7 @@ def attribute( this function returns either a tensor with one element or a 1D tensor with length equal to batch_size (one scalar per input example) - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define the starting point from which integral is computed. Baselines can be provided as: @@ -155,7 +155,7 @@ def attribute( use zero scalar corresponding to each input tensor. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -174,7 +174,7 @@ def attribute( Default: None n_steps (int, optional): The number of steps used by the approximation method. Default: 50. - method (string, optional): Method for approximating the integral, + method (str, optional): Method for approximating the integral, one of `riemann_right`, `riemann_left`, `riemann_middle`, `riemann_trapezoid` or `gausslegendre`. Default: `gausslegendre` if no method is provided. @@ -202,8 +202,8 @@ def attribute( Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Integrated gradients for particular neuron with respect to each input feature. Attributions will always be the same size as the provided diff --git a/captum/attr/_core/noise_tunnel.py b/captum/attr/_core/noise_tunnel.py index 0fbc32115e..eda936a048 100644 --- a/captum/attr/_core/noise_tunnel.py +++ b/captum/attr/_core/noise_tunnel.py @@ -43,10 +43,12 @@ class NoiseTunnel(Attribution): returned. More details about adding noise can be found in the following papers: - https://arxiv.org/abs/1810.03292 - https://arxiv.org/abs/1810.03307 - https://arxiv.org/abs/1706.03825 - https://arxiv.org/pdf/1806.10758 + + * https://arxiv.org/abs/1810.03292 + * https://arxiv.org/abs/1810.03307 + * https://arxiv.org/abs/1706.03825 + * https://arxiv.org/abs/1806.10758 + This method currently also supports batches of multiple examples input, however it can be computationally expensive depending on the model, the dimensionality of the data and execution environment. @@ -93,7 +95,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which integrated + inputs (Tensor or tuple of Tensor): Input for which integrated gradients are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -101,21 +103,21 @@ def attribute( that for all given input tensors, dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - nt_type (string, optional): Smoothing type of the attributions. + nt_type (str, optional): Smoothing type of the attributions. `smoothgrad`, `smoothgrad_sq` or `vargrad` Default: `smoothgrad` if `type` is not provided. - nt_samples (int, optional): The number of randomly generated examples + nt_samples (int, optional): The number of randomly generated examples per sample in the input batch. Random examples are generated by adding gaussian random noise to each sample. Default: `5` if `nt_samples` is not provided. - nt_samples_batch_size (int, optional): The number of the `nt_samples` + nt_samples_batch_size (int, optional): The number of the `nt_samples` that will be processed together. With the help of this parameter we can avoid out of memory situation and reduce the number of randomly generated examples per sample in each batch. Default: None if `nt_samples_batch_size` is not provided. In this case all `nt_samples` will be processed together. - stdevs (float, or a tuple of floats optional): The standard deviation + stdevs (float or tuple of float, optional): The standard deviation of gaussian noise with zero mean that is added to each input in the batch. If `stdevs` is a single float value then that same value is used for all inputs. If it is @@ -137,7 +139,7 @@ def attribute( Returns: **attributions** or 2-element tuple of **attributions**, **delta**: - - **attributions** (*tensor* or tuple of *tensors*): + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution with respect to each input feature. attributions will always be the same size as the provided inputs, with each value @@ -166,7 +168,7 @@ def attribute( >>> nt = NoiseTunnel(ig) >>> # Generates 10 perturbed input tensors per image. >>> # Computes integrated gradients for class 3 for each generated - >>> # input and averages attributions accros all 10 + >>> # input and averages attributions across all 10 >>> # perturbed inputs per image >>> attribution = nt.attribute(input, nt_type='smoothgrad', >>> nt_samples=10, target=3) diff --git a/captum/attr/_core/occlusion.py b/captum/attr/_core/occlusion.py index de148693fa..fedc2dae05 100644 --- a/captum/attr/_core/occlusion.py +++ b/captum/attr/_core/occlusion.py @@ -39,8 +39,8 @@ def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable): The forward function of the model or - any modification of it + forward_func (Callable): The forward function of the model or + any modification of it. """ FeatureAblation.__init__(self, forward_func) self.use_weights = True @@ -62,7 +62,7 @@ def attribute( # type: ignore r""" Args: - inputs (tensor or tuple of tensors): Input for which occlusion + inputs (Tensor or tuple of Tensor): Input for which occlusion attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -71,7 +71,7 @@ def attribute( # type: ignore to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - sliding_window_shapes (tuple or tuple of tuples): Shape of patch + sliding_window_shapes (tuple or tuple of tuple): Shape of patch (hyperrectangle) to occlude each input. For a single input tensor, this must be a tuple of length equal to the number of dimensions of the input tensor - 1, defining @@ -80,7 +80,7 @@ def attribute( # type: ignore this must be a tuple containing one tuple for each input tensor defining the dimensions of the patch for that input tensor, as described for the single tensor case. - strides (int or tuple or tuple of ints or tuple of tuples, optional): + strides (int or tuple or tuple of int or tuple of tuple, optional): This defines the step by which the occlusion hyperrectangle should be shifted by in each direction for each iteration. For a single tensor input, this can be either a single @@ -100,7 +100,7 @@ def attribute( # type: ignore If None is provided, a stride of 1 is used for each dimension of each input tensor. Default: None - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference value which replaces each feature when occluded. Baselines can be provided as: @@ -124,10 +124,11 @@ def attribute( # type: ignore - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which difference is computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -152,7 +153,7 @@ def attribute( # type: ignore target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -186,8 +187,8 @@ def attribute( # type: ignore Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The attributions with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value diff --git a/captum/attr/_core/saliency.py b/captum/attr/_core/saliency.py index 3790bd2068..505c35b28e 100644 --- a/captum/attr/_core/saliency.py +++ b/captum/attr/_core/saliency.py @@ -20,15 +20,15 @@ class Saliency(GradientAttribution): the default, the absolute value of the gradients is returned. More details about the approach can be found in the following paper: - https://arxiv.org/pdf/1312.6034.pdf + https://arxiv.org/abs/1312.6034 """ def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable): The forward function of the model or - any modification of it + forward_func (Callable): The forward function of the model or + any modification of it. """ GradientAttribution.__init__(self, forward_func) @@ -43,7 +43,7 @@ def attribute( r""" Args: - inputs (tensor or tuple of tensors): Input for which saliency + inputs (Tensor or tuple of Tensor): Input for which saliency is computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -52,7 +52,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -81,7 +81,7 @@ def attribute( to True, otherwise returns the (signed) gradients if False. Default: True - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -95,8 +95,8 @@ def attribute( Default: None Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The gradients with respect to each input feature. Attributions will always be the same size as the provided inputs, with each value diff --git a/captum/attr/_core/shapley_value.py b/captum/attr/_core/shapley_value.py index 72af4e7237..4d5f244816 100644 --- a/captum/attr/_core/shapley_value.py +++ b/captum/attr/_core/shapley_value.py @@ -66,7 +66,7 @@ def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable): The forward function of the model or + forward_func (Callable): The forward function of the model or any modification of it. The forward function can either return a scalar per example, or a single scalar for the full batch. If a single scalar is returned for the batch, @@ -96,7 +96,7 @@ def attribute( Args: - inputs (tensor or tuple of tensors): Input for which Shapley value + inputs (Tensor or tuple of Tensor): Input for which Shapley value sampling attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. @@ -106,7 +106,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference value which replaces each feature when ablated. Baselines can be provided as: @@ -131,10 +131,11 @@ def attribute( - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which difference is computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -159,7 +160,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -174,7 +175,7 @@ def attribute( Note that attributions are not computed with respect to these arguments. Default: None - feature_mask (tensor or tuple of tensors, optional): + feature_mask (Tensor or tuple of Tensor, optional): feature_mask defines a mask for the input, grouping features which should be added together. feature_mask should contain the same number of tensors as inputs. @@ -196,7 +197,7 @@ def attribute( If None, then a feature mask is constructed which assigns each scalar within a tensor as a separate feature Default: None - n_samples (int, optional): The number of feature permutations + n_samples (int, optional): The number of feature permutations tested. Default: `25` if `n_samples` is not provided. perturbations_per_eval (int, optional): Allows multiple ablations @@ -218,8 +219,8 @@ def attribute( Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The attributions with respect to each input feature. If the forward function returns a scalar value per example, attributions will be @@ -519,7 +520,7 @@ def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable): The forward function of the model or + forward_func (Callable): The forward function of the model or any modification of it. The forward function can either return a scalar per example, or a single scalar for the full batch. If a single scalar is returned for the batch, @@ -548,7 +549,7 @@ def attribute( Args: - inputs (tensor or tuple of tensors): Input for which Shapley value + inputs (Tensor or tuple of Tensor): Input for which Shapley value sampling attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. @@ -558,7 +559,7 @@ def attribute( to the number of examples (aka batch size), and if multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference value which replaces each feature when ablated. Baselines can be provided as: @@ -583,10 +584,11 @@ def attribute( - or a scalar, corresponding to a tensor in the inputs' tuple. This scalar value is broadcasted for corresponding input tensor. + In the cases when `baselines` is not provided, we internally use zero scalar corresponding to each input tensor. Default: None - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which difference is computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -611,7 +613,7 @@ def attribute( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -626,7 +628,7 @@ def attribute( Note that attributions are not computed with respect to these arguments. Default: None - feature_mask (tensor or tuple of tensors, optional): + feature_mask (Tensor or tuple of Tensor, optional): feature_mask defines a mask for the input, grouping features which should be added together. feature_mask should contain the same number of tensors as inputs. @@ -666,8 +668,8 @@ def attribute( a simple output of progress. Default: False Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): The attributions with respect to each input feature. If the forward function returns a scalar value per example, attributions will be diff --git a/captum/attr/_models/base.py b/captum/attr/_models/base.py index d57646c0da..0b9e406d73 100644 --- a/captum/attr/_models/base.py +++ b/captum/attr/_models/base.py @@ -76,7 +76,7 @@ def indices_to_embeddings(self, *input, **kwargs): Args: - *input (Any, Optional): This can be a tensor(s) of input indices or any + *input (Any, optional): This can be a tensor(s) of input indices or any other variable necessary to comput the embeddings. A typical example of input indices are word or token indices. **kwargs (Any, optional): Similar to `input` this can be any sequence @@ -99,10 +99,10 @@ class TokenReferenceBase: `TokenReferenceBase` class. """ - def __init__(self, reference_token_idx=0) -> None: + def __init__(self, reference_token_idx: int = 0) -> None: self.reference_token_idx = reference_token_idx - def generate_reference(self, sequence_length, device): + def generate_reference(self, sequence_length, device: torch.device) -> torch.Tensor: r""" Generated reference tensor of given `sequence_length` using `reference_token_idx`. @@ -137,22 +137,25 @@ def _set_deep_layer_value(obj, layer_names, value): setattr(reduce(getattr, layer_names[:-1], obj), layer_names[-1], value) -def configure_interpretable_embedding_layer(model, embedding_layer_name="embedding"): +def configure_interpretable_embedding_layer( + model: Module, embedding_layer_name: str = "embedding" +) -> InterpretableEmbeddingBase: r""" - This method wraps model's embedding layer with an interpretable embedding + This method wraps a model's embedding layer with an interpretable embedding layer that allows us to access the embeddings through their indices. Args: - model (torch.nn.Model): An instance of PyTorch model that contains embeddings. + model (torch.nn.Module): An instance of PyTorch model that contains embeddings. embedding_layer_name (str, optional): The name of the embedding layer in the `model` that we would like to make interpretable. Returns: - interpretable_emb (tensor): An instance of `InterpretableEmbeddingBase` - embedding layer that wraps model's embedding layer that is being - accessed through `embedding_layer_name`. + interpretable_emb (InterpretableEmbeddingBase): An instance of + `InterpretableEmbeddingBase` embedding layer that wraps model's + embedding layer that is being accessed through + `embedding_layer_name`. Examples:: @@ -202,7 +205,9 @@ def configure_interpretable_embedding_layer(model, embedding_layer_name="embeddi return interpretable_emb -def remove_interpretable_embedding_layer(model, interpretable_emb): +def remove_interpretable_embedding_layer( + model: Module, interpretable_emb: InterpretableEmbeddingBase +) -> None: r""" Removes interpretable embedding layer and sets back original embedding layer in the model. @@ -210,8 +215,8 @@ def remove_interpretable_embedding_layer(model, interpretable_emb): Args: model (torch.nn.Module): An instance of PyTorch model that contains embeddings - interpretable_emb (tensor): An instance of `InterpretableEmbeddingBase` - that was originally created in + interpretable_emb (InterpretableEmbeddingBase): An instance of + `InterpretableEmbeddingBase` that was originally created in `configure_interpretable_embedding_layer` function and has to be removed after interpretation is finished. diff --git a/captum/attr/_utils/approximation_methods.py b/captum/attr/_utils/approximation_methods.py index 9d63e90c1a..755e701d6a 100644 --- a/captum/attr/_utils/approximation_methods.py +++ b/captum/attr/_utils/approximation_methods.py @@ -28,7 +28,7 @@ def approximation_parameters( r"""Retrieves parameters for the input approximation `method` Args: - method: The name of the approximation method. Currently only `riemann` + method (str): The name of the approximation method. Currently only `riemann` and gauss legendre are """ if method in SUPPORTED_RIEMANN_METHODS: @@ -45,17 +45,16 @@ def riemann_builders( Args: - n: The number of integration steps - method: `left`, `right`, `middle` and `trapezoid` riemann + method (Riemann): `left`, `right`, `middle` and `trapezoid` riemann Returns: 2-element tuple of **step_sizes**, **alphas**: - - **step_sizes** (*callable*): + - **step_sizes** (*Callable*): `step_sizes` takes the number of steps as an input argument and returns an array of steps sizes which sum is smaller than or equal to one. - - **alphas** (*callable*): + - **alphas** (*Callable*): `alphas` takes the number of steps as an input argument and returns the multipliers/coefficients for the inputs of integrand in the range of [0, 1] @@ -104,18 +103,14 @@ def gauss_legendre_builders() -> Tuple[ proposed by [Xue Feng and her intern Hauroun Habeeb] (https://research.fb.com/people/feng-xue/). - Args: - - n (int): The number of integration steps - Returns: 2-element tuple of **step_sizes**, **alphas**: - - **step_sizes** (*callable*): + - **step_sizes** (*Callable*): `step_sizes` takes the number of steps as an input argument and returns an array of steps sizes which sum is smaller than or equal to one. - - **alphas** (*callable*): + - **alphas** (*Callable*): `alphas` takes the number of steps as an input argument and returns the multipliers/coefficients for the inputs of integrand in the range of [0, 1] diff --git a/captum/attr/_utils/attribution.py b/captum/attr/_utils/attribution.py index f4b6e9d35c..fed579eb92 100644 --- a/captum/attr/_utils/attribution.py +++ b/captum/attr/_utils/attribution.py @@ -31,7 +31,7 @@ class Attribution: def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable or torch.nn.Module): This can either be an instance + forward_func (Callable or torch.nn.Module): This can either be an instance of pytorch model or any modification of model's forward function. """ @@ -47,17 +47,17 @@ def __init__(self, forward_func: Callable) -> None: Args: - inputs (tensor or tuple of tensors): Input for which attribution + inputs (Tensor or tuple of Tensor): Input for which attribution is computed. It can be provided as a single tensor or a tuple of multiple tensors. If multiple input tensors - are provided, the batch sizes must be aligned accross all + are provided, the batch sizes must be aligned across all tensors. Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution values for each input tensor. The `attributions` have the same shape and dimensionality as the inputs. @@ -97,21 +97,21 @@ def has_convergence_delta(self) -> bool: Args: - attributions (tensor or tuple of tensors): Attribution scores that + attributions (Tensor or tuple of Tensor): Attribution scores that are precomputed by an attribution algorithm. Attributions can be provided in form of a single tensor or a tuple of those. It is assumed that attribution tensor's dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - *args (optional): Additonal arguments that are used by the + *args (Any, optional): Additonal arguments that are used by the sub-classes depending on the specific implementation of `compute_convergence_delta`. Returns: - *tensor* of **deltas**: - - **deltas** (*tensor*): + *Tensor* of **deltas**: + - **deltas** (*Tensor*): Depending on specific implementaion of sub-classes, convergence delta can be returned per sample in form of a tensor or it can be aggregated @@ -150,7 +150,7 @@ def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable or torch.nn.Module): This can either be an instance + forward_func (Callable or torch.nn.Module): This can either be an instance of pytorch model or any modification of model's forward function. """ @@ -184,26 +184,26 @@ def compute_convergence_delta( Args: - attributions (tensor or tuple of tensors): Precomputed attribution + attributions (Tensor or tuple of Tensor): Precomputed attribution scores. The user can compute those using any attribution - algorithm. It is assumed the the shape and the + algorithm. It is assumed the shape and the dimensionality of attributions must match the shape and the dimensionality of `start_point` and `end_point`. It also assumes that the attribution tensor's dimension 0 corresponds to the number of examples, and if multiple input tensors are provided, the examples must be aligned appropriately. - start_point (tensor or tuple of tensors, optional): `start_point` + start_point (Tensor or tuple of Tensor, optional): `start_point` is passed as an input to model's forward function. It is the starting point of attributions' approximation. It is assumed that both `start_point` and `end_point` have the same shape and dimensionality. - end_point (tensor or tuple of tensors): `end_point` + end_point (Tensor or tuple of Tensor): `end_point` is passed as an input to model's forward function. It is the end point of attributions' approximation. It is assumed that both `start_point` and `end_point` have the same shape and dimensionality. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which gradients are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -228,7 +228,7 @@ def compute_convergence_delta( target for the corresponding example. Default: None - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -245,8 +245,8 @@ def compute_convergence_delta( Returns: - *tensor* of **deltas**: - - **deltas** (*tensor*): + *Tensor* of **deltas**: + - **deltas** (*Tensor*): This implementation returns convergence delta per sample. Deriving sub-classes may do any type of aggregation of those values, if necessary. @@ -306,7 +306,7 @@ def __init__(self, forward_func: Callable) -> None: r""" Args: - forward_func (callable or torch.nn.Module): This can either be an instance + forward_func (Callable or torch.nn.Module): This can either be an instance of pytorch model or any modification of model's forward function. """ @@ -318,12 +318,13 @@ def multiplies_by_inputs(self): class InternalAttribution(Attribution, Generic[ModuleOrModuleList]): - layer: ModuleOrModuleList r""" Shared base class for LayerAttrubution and NeuronAttribution, attribution types that require a model and a particular layer. """ + layer: ModuleOrModuleList + def __init__( self, forward_func: Callable, @@ -333,12 +334,12 @@ def __init__( r""" Args: - forward_func (callable or torch.nn.Module): This can either be an instance + forward_func (Callable or torch.nn.Module): This can either be an instance of pytorch model or any modification of model's forward function. layer (torch.nn.Module): Layer for which output attributions are computed. Output size of attribute matches that of layer output. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model, which allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -351,7 +352,7 @@ def __init__( class LayerAttribution(InternalAttribution): r""" - Layer attribution provides attribution values for the given layer, quanitfying + Layer attribution provides attribution values for the given layer, quantifying the importance of each neuron within the given layer's output. The output attribution of calling attribute on a LayerAttribution object always matches the size of the layer output. @@ -366,12 +367,12 @@ def __init__( r""" Args: - forward_func (callable or torch.nn.Module): This can either be an instance + forward_func (Callable or torch.nn.Module): This can either be an instance of pytorch model or any modification of model's forward function. layer (torch.nn.Module): Layer for which output attributions are computed. Output size of attribute matches that of layer output. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model, which allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -392,13 +393,13 @@ def interpolate( Args: - layer_attribution (torch.Tensor): Tensor of given layer attributions. + layer_attribution (Tensor): Tensor of given layer attributions. interpolate_dims (int or tuple): Upsampled dimensions. The number of elements must be the number of dimensions of layer_attribution - 2, since the first dimension corresponds to number of examples and the second is assumed to correspond to the number of channels. - interpolate_mode (str): Method for interpolation, which + interpolate_mode (str): Method for interpolation, which must be a valid input interpolation mode for torch.nn.functional. These methods are "nearest", "area", "linear" (3D-only), "bilinear" @@ -407,8 +408,8 @@ def interpolate( attribution. Returns: - *tensor* of upsampled **attributions**: - - **attributions** (*tensor*): + *Tensor* of upsampled **attributions**: + - **attributions** (*Tensor*): Upsampled layer attributions with first 2 dimensions matching slayer_attribution and remaining dimensions given by interpolate_dims. @@ -418,7 +419,7 @@ def interpolate( class NeuronAttribution(InternalAttribution): r""" - Neuron attribution provides input attribution for a given neuron, quanitfying + Neuron attribution provides input attribution for a given neuron, quantifying the importance of each input feature in the activation of a particular neuron. Calling attribute on a NeuronAttribution object requires also providing the index of the neuron in the output of the given layer for which attributions @@ -436,12 +437,12 @@ def __init__( r""" Args: - forward_func (callable or torch.nn.Module): This can either be an instance + forward_func (Callable or torch.nn.Module): This can either be an instance of pytorch model or any modification of model's forward function. layer (torch.nn.Module): Layer for which output attributions are computed. Output size of attribute matches that of layer output. - device_ids (list(int)): Device ID list, necessary only if forward_func + device_ids (list[int]): Device ID list, necessary only if forward_func applies a DataParallel model, which allows reconstruction of intermediate outputs from batched results across devices. If forward_func is given as the DataParallel model itself, @@ -469,8 +470,8 @@ def __init__( Returns: - *tensor* or tuple of *tensors* of **attributions**: - - **attributions** (*tensor* or tuple of *tensors*): + *Tensor* or tuple of *Tensor* of **attributions**: + - **attributions** (*Tensor* or tuple of *Tensor*): Attribution values for each input vector. The `attributions` have the dimensionality of inputs. diff --git a/captum/attr/_utils/class_summarizer.py b/captum/attr/_utils/class_summarizer.py index 2485711866..664088c299 100644 --- a/captum/attr/_utils/class_summarizer.py +++ b/captum/attr/_utils/class_summarizer.py @@ -36,11 +36,11 @@ def update( # type: ignore This accepts either a single tensor to summarise or a tuple of tensors. Args: - x (Tensor or Tuple[Tensor, ...]): + x (Tensor or tuple of Tensor): The input tensor to be summarised. The first dimension of this input must be associated to the batch size of the inputs. - labels (int, tuple, tensor or list, optional): + labels (int, tuple, Tensor, or list, optional): The associated labels for `x`. If Any, we assume `labels` represents the label for all inputs in `x`. diff --git a/captum/attr/_utils/summarizer.py b/captum/attr/_utils/summarizer.py index 874e5d263b..e4c5c860a0 100644 --- a/captum/attr/_utils/summarizer.py +++ b/captum/attr/_utils/summarizer.py @@ -173,10 +173,10 @@ class SummarizerSingleTensor: def __init__(self, stats: List[Stat], summary_stats_indices: List[int]) -> None: r""" Args: - stats (list of Stat): A list of all the Stat objects that + stats (list[Stat]): A list of all the Stat objects that need to be updated. This must be in the appropriate order for updates (see `_reorder_stats`) - summary_stats (list of int): A list of indicies, referencing `stats`, + summary_stats (list[int]): A list of indicies, referencing `stats`, which are the stats you want to show in the .summary property. This does not require any specific order. """ diff --git a/captum/attr/_utils/visualization.py b/captum/attr/_utils/visualization.py index 0cfada9b7b..c4da31b6d6 100644 --- a/captum/attr/_utils/visualization.py +++ b/captum/attr/_utils/visualization.py @@ -118,18 +118,18 @@ def visualize_image_attr( Args: - attr (numpy.array): Numpy array corresponding to attributions to be + attr (numpy.ndarray): Numpy array corresponding to attributions to be visualized. Shape must be in the form (H, W, C), with channels as last dimension. Shape must also match that of the original image if provided. - original_image (numpy.array, optional): Numpy array corresponding to + original_image (numpy.ndarray, optional): Numpy array corresponding to original image. Shape must be in the form (H, W, C), with channels as the last dimension. Image can be provided either with float values in range 0-1 or int values between 0-255. This is a necessary argument for any visualization method which utilizes the original image. Default: None - method (string, optional): Chosen method for visualizing attribution. + method (str, optional): Chosen method for visualizing attribution. Supported options are: 1. `heat_map` - Display heat map of chosen attributions @@ -145,8 +145,9 @@ def visualize_image_attr( 5. `alpha_scaling` - Sets alpha channel of each pixel to be equal to normalized attribution value. + Default: `heat_map` - sign (string, optional): Chosen sign of attributions to visualize. Supported + sign (str, optional): Chosen sign of attributions to visualize. Supported options are: 1. `positive` - Displays only positive pixel attributions. @@ -160,6 +161,7 @@ def visualize_image_attr( values. This is not supported for `masked_image` or `alpha_scaling` modes, since signed information cannot be represented in these modes. + Default: `absolute_value` plt_fig_axis (tuple, optional): Tuple of matplotlib.pyplot.figure and axis on which to visualize. If None is provided, then a new figure @@ -172,7 +174,7 @@ def visualize_image_attr( and scale value are computed using absolute value of attributions. Default: 2 - cmap (string, optional): String corresponding to desired colormap for + cmap (str, optional): String corresponding to desired colormap for heatmap visualization. This defaults to "Reds" for negative sign, "Blues" for absolute value, "Greens" for positive sign, and a spectrum from red to green for all. Note that this @@ -182,18 +184,18 @@ def visualize_image_attr( `blended_heat_map` visualization mode, which overlays the heat map over the greyscaled original image. Default: 0.5 - show_colorbar (boolean, optional): Displays colorbar for heatmap below + show_colorbar (bool, optional): Displays colorbar for heatmap below the visualization. If given method does not use a heatmap, then a colormap axis is created and hidden. This is necessary for appropriate alignment when visualizing multiple plots, some with colorbars and some without. Default: False - title (string, optional): Title string for plot. If None, no title is + title (str, optional): Title string for plot. If None, no title is set. Default: None fig_size (tuple, optional): Size of figure created. Default: (6,6) - use_pyplot (boolean, optional): If true, uses pyplot to create and show + use_pyplot (bool, optional): If true, uses pyplot to create and show figure and displays the figure after creating. If False, uses Matplotlib object oriented API and simply returns a figure object without showing. @@ -347,29 +349,29 @@ def visualize_image_attr_multiple( Args: - attr (numpy.array): Numpy array corresponding to attributions to be + attr (numpy.ndarray): Numpy array corresponding to attributions to be visualized. Shape must be in the form (H, W, C), with channels as last dimension. Shape must also match that of the original image if provided. - original_image (numpy.array, optional): Numpy array corresponding to + original_image (numpy.ndarray, optional): Numpy array corresponding to original image. Shape must be in the form (H, W, C), with channels as the last dimension. Image can be provided either with values in range 0-1 or 0-255. This is a necessary argument for any visualization method which utilizes the original image. - methods (list of strings): List of strings of length k, defining method + methods (list[str]): List of strings of length k, defining method for each visualization. Each method must be a valid string argument for method to visualize_image_attr. - signs (list of strings): List of strings of length k, defining signs for + signs (list[str]): List of strings of length k, defining signs for each visualization. Each sign must be a valid string argument for sign to visualize_image_attr. - titles (list of strings, optional): List of strings of length k, providing + titles (list[str], optional): List of strings of length k, providing a title string for each plot. If None is provided, no titles are added to subplots. Default: None fig_size (tuple, optional): Size of figure created. Default: (8, 6) - use_pyplot (boolean, optional): If true, uses pyplot to create and show + use_pyplot (bool, optional): If true, uses pyplot to create and show figure and displays the figure after creating. If False, uses Matplotlib object oriented API and simply returns a figure object without showing. @@ -460,19 +462,19 @@ def visualize_timeseries_attr( Args: - attr (numpy.array): Numpy array corresponding to attributions to be + attr (numpy.ndarray): Numpy array corresponding to attributions to be visualized. Shape must be in the form (N, C) with channels as last dimension, unless `channels_last` is set to True. Shape must also match that of the timeseries data. - data (numpy.array): Numpy array corresponding to the original, + data (numpy.ndarray): Numpy array corresponding to the original, equidistant timeseries data. Shape must be in the form (N, C) with channels as last dimension, unless `channels_last` is set to true. - x_values (numpy.array, optional): Numpy array corresponding to the + x_values (numpy.ndarray, optional): Numpy array corresponding to the points on the x-axis. Shape must be in the form (N, ). If not provided, integers from 0 to N-1 are used. Default: None - method (string, optional): Chosen method for visualizing attributions + method (str, optional): Chosen method for visualizing attributions overlaid onto data. Supported options are: 1. `overlay_individual` - Plot each channel individually in @@ -487,8 +489,9 @@ def visualize_timeseries_attr( and color the graphs according to the attribution values. Works best with color maps that does not contain white or very bright colors. + Default: `overlay_individual` - sign (string, optional): Chosen sign of attributions to visualize. + sign (str, optional): Chosen sign of attributions to visualize. Supported options are: 1. `positive` - Displays only positive pixel attributions. @@ -500,8 +503,9 @@ def visualize_timeseries_attr( 4. `all` - Displays both positive and negative attribution values. + Default: `absolute_value` - channel_labels (list of strings, optional): List of labels + channel_labels (list[str], optional): List of labels corresponding to each channel in data. Default: None channels_last (bool, optional): If True, data is expected to have @@ -519,7 +523,7 @@ def visualize_timeseries_attr( and scale value are computed using absolute value of attributions. Default: 2 - cmap (string, optional): String corresponding to desired colormap for + cmap (str, optional): String corresponding to desired colormap for heatmap visualization. This defaults to "Reds" for negative sign, "Blues" for absolute value, "Greens" for positive sign, and a spectrum from red to green for all. Note that this @@ -529,14 +533,14 @@ def visualize_timeseries_attr( `blended_heat_map` visualization mode, which overlays the heat map over the greyscaled original image. Default: 0.7 - show_colorbar (boolean): Displays colorbar for heat map below + show_colorbar (bool): Displays colorbar for heat map below the visualization. - title (string, optional): Title string for plot. If None, no title is + title (str, optional): Title string for plot. If None, no title is set. Default: None fig_size (tuple, optional): Size of figure created. Default: (6,6) - use_pyplot (boolean): If true, uses pyplot to create and show + use_pyplot (bool): If true, uses pyplot to create and show figure and displays the figure after creating. If False, uses Matplotlib object oriented API and simply returns a figure object without showing. diff --git a/captum/concept/_core/cav.py b/captum/concept/_core/cav.py index 39aa9fba85..6aedb24fff 100644 --- a/captum/concept/_core/cav.py +++ b/captum/concept/_core/cav.py @@ -14,7 +14,7 @@ class CAV: boundary of a classifier which distinguishes between activation vectors produced by different concepts. More details can be found in the paper: - https://arxiv.org/pdf/1711.11279.pdf + https://arxiv.org/abs/1711.11279 """ def __init__( @@ -65,7 +65,7 @@ def assemble_save_path( layer name. model_id (str): A unique model identifier associated with input `layer` and `concepts` - concepts (list(Concept)): A list of concepts that are concatenated + concepts (list[Concept]): A list of concepts that are concatenated together and used as a concept key using their ids. These concept ids are retrieved from TCAV s`Concept` objects. layer (str): The name of the layer for which the activations are @@ -146,7 +146,7 @@ def load(cavs_path: str, model_id: str, concepts: List[Concept], layer: str): model_id (str): A unique model identifier associated with the CAVs. There exist a folder named `model_id` under `cavs_path` path. The CAVs are loaded from this folder. - concepts (list[Concept]): A List of concepts for which + concepts (list[Concept]): A List of concepts for which we would like to load the cavs. layer (str): The layer name. Ex.: "inception4c". In case of nested layers we use dots to specify the depth / hierarchy. diff --git a/captum/concept/_core/concept.py b/captum/concept/_core/concept.py index a550ab8a9d..b0adbd7f39 100644 --- a/captum/concept/_core/concept.py +++ b/captum/concept/_core/concept.py @@ -25,7 +25,7 @@ def __init__( r""" Args: - id (int): The unique identifier of the concept. + id (int): The unique identifier of the concept. name (str): A unique name of the concept. data_iter (DataLoader): A pytorch DataLoader object that combines a dataset and a sampler, and provides an iterable over a given @@ -35,6 +35,7 @@ def __init__( https://pytorch.org/docs/stable/data.html Example:: + >>> # Creates a Concept object named "striped", with a data_iter >>> # object to iterate over all files in "./concepts/striped" >>> concept_name = "striped" @@ -79,7 +80,7 @@ def __init__(self, model: Module) -> None: Args: - inputs (tensor or tuple of tensors): Inputs for which concept-based + inputs (Tensor or tuple of Tensor): Inputs for which concept-based interpretation scores are computed. It can be provided as a single tensor or a tuple of multiple tensors. If multiple input tensors are provided, the batch size (the first diff --git a/captum/concept/_core/tcav.py b/captum/concept/_core/tcav.py index 8b6c996856..64977901ef 100644 --- a/captum/concept/_core/tcav.py +++ b/captum/concept/_core/tcav.py @@ -37,11 +37,13 @@ def __init__(self, datasets: List[AV.AVDataset], labels: List[int]) -> None: However, __get_item__ not only returns a batch of activation vectors, but also a batch of labels indicating which concept that batch of activation vectors is associated with. + Args: + datasets (list[Dataset]): The k-th element of datasets is a Dataset representing activation vectors associated with the k-th concept - labels (list[Int]): The k-th element of labels is the integer label + labels (list[int]): The k-th element of labels is the integer label associated with the k-th concept """ assert len(datasets) == len( @@ -69,16 +71,17 @@ def _i_to_k(self, i): else: right = mid - def __getitem__(self, i): + def __getitem__(self, i: int): """ Returns a batch of activation vectors, as well as a batch of labels indicating which concept the batch of activation vectors is associated with. - args: + Args: + i (int): which (activation vector, label) batch in the dataset to return - returns: + Returns: inputs (Tensor): i-th batch in Dataset (representing activation vectors) labels (Tensor): labels of i-th batch in Dataset @@ -91,7 +94,7 @@ def __getitem__(self, i): labels = torch.tensor([self.labels[k]] * inputs.size(0), device=inputs.device) return inputs, labels - def __len__(self): + def __len__(self) -> int: """ returns the total number of batches in the labelled_dataset """ @@ -113,6 +116,7 @@ def train_cav( Please see the TCAV class documentation for further information. Args: + model_id (str): A unique identifier for the PyTorch model for which we would like to load the layer activations and train a model in order to compute CAVs. @@ -120,7 +124,7 @@ def train_cav( to train a classifier and learn decision boundaries between those concepts for each layer defined in the `layers` argument. - layers (str, list[str]): A list of layer names or a single layer + layers (str or list[str]): A list of layer names or a single layer name that is used to compute the activations of all concept examples per concept and train a classifier using those activations. @@ -203,7 +207,7 @@ class TCAV(ConceptInterpreter): This class implements ConceptInterpreter abstract class using an approach called Testing with Concept Activation Vectors (TCAVs), as described in the paper: - https://arxiv.org/pdf/1711.11279.pdf + https://arxiv.org/abs/1711.11279 TCAV scores for a given layer, a list of concepts and input example are computed using the dot product between prediction's layer @@ -251,9 +255,10 @@ def __init__( ) -> None: r""" Args: + model (Module): An instance of pytorch model that is used to compute layer activations and attributions. - layers (str, list[str]): A list of layer name(s) that are + layers (str or list[str]): A list of layer name(s) that are used for computing concept activations (cavs) and layer attributions. model_id (str, optional): A unique identifier for the PyTorch `model` @@ -275,7 +280,7 @@ def __init__( attribution algorithm. save_path (str, optional): The path for storing CAVs and Activation Vectors (AVs). - classifier_kwargs (any, optional): Additional arguments such as + classifier_kwargs (Any, optional): Additional arguments such as `test_split_ratio` that are passed to concept `classifier`. Examples:: @@ -342,7 +347,7 @@ def generate_activation(self, layers: Union[str, List], concept: Concept) -> Non the list of layer(s) `layers`. Args: - layers (str, list[str]): A list of layer names or a layer name + layers (str or list[str]): A list of layer names or a layer name that is used to compute layer activations for the specific `concept`. concept (Concept): A single Concept object that provides access @@ -403,6 +408,7 @@ def load_cavs( of concepts and layer. Args: + concepts (list[Concept]): A list of Concept objects for which we want to load the CAV. @@ -458,6 +464,7 @@ def compute_cavs( the argument. Args: + experimental_sets (list[list[Concept]]): A list of lists of concept instances for which the cavs will be computed. force_train (bool, optional): A flag that indicates whether to @@ -469,6 +476,7 @@ def compute_cavs( multi-processing, otherwise it will be performed sequentially in a single process. Default: None + Returns: cavs (dict) : A mapping of concept ids and layers to CAV objects. If CAVs for the concept_ids-layer pairs are present in the @@ -569,7 +577,8 @@ def interpret( scores for specific predictions and CAV vectors. Args: - inputs (tensor or tuple of tensors): Inputs for which predictions + + inputs (Tensor or tuple of Tensor): Inputs for which predictions are performed and attributions are computed. If model takes a single tensor as input, a single input tensor should be provided. @@ -581,7 +590,7 @@ def interpret( provided, the examples must be aligned appropriately. experimental_sets (list[list[Concept]]): A list of list of Concept instances. - target (int, tuple, tensor or list, optional): Output indices for + target (int, tuple, Tensor, or list, optional): Output indices for which attributions are computed (for classification cases, this is usually the target class). If the network returns a scalar value per example, @@ -617,6 +626,7 @@ def interpret( attribution algorithm's attribute method. This could be for example `n_steps` in case of integrated gradients. Default: None + Returns: results (dict): A dictionary of sign and magnitude -based tcav scores for each concept set per layer. diff --git a/captum/concept/_utils/classifier.py b/captum/concept/_utils/classifier.py index 5bdf605470..b8ba7d0a59 100644 --- a/captum/concept/_utils/classifier.py +++ b/captum/concept/_utils/classifier.py @@ -95,7 +95,7 @@ def weights(self) -> Tensor: C is the number of classes and F is the number of features. Returns: - weights (tensor): A torch Tensor with the weights resulting from + weights (Tensor): A torch Tensor with the weights resulting from the model training. """ pass @@ -189,10 +189,10 @@ def weights(self) -> Tensor: r""" This function returns a C x F tensor weights, where C is the number of classes and F is the number of features. - In case of binary classification, C = 2 othewise it is > 2. + In case of binary classification, C = 2 otherwise it is > 2. Returns: - weights (tensor): A torch Tensor with the weights resulting from + weights (Tensor): A torch Tensor with the weights resulting from the model training. """ assert self.lm.linear is not None, ( diff --git a/captum/concept/_utils/data_iterator.py b/captum/concept/_utils/data_iterator.py index 6a8a48f197..574bc71ae7 100644 --- a/captum/concept/_utils/data_iterator.py +++ b/captum/concept/_utils/data_iterator.py @@ -16,7 +16,7 @@ class CustomIterableDataset(IterableDataset): def __init__(self, transform_filename_to_tensor: Callable, path: str) -> None: r""" Args: - transform_filename_to_tensor (callable): Function to read a data + transform_filename_to_tensor (Callable): Function to read a data file from path and return a tensor from that file. path (str): Path to dataset files. This can be either a path to a directory or a file where input examples are stored. diff --git a/captum/influence/_core/similarity_influence.py b/captum/influence/_core/similarity_influence.py index 0fd21eedb7..db1484466e 100644 --- a/captum/influence/_core/similarity_influence.py +++ b/captum/influence/_core/similarity_influence.py @@ -82,7 +82,7 @@ def __init__( Args: module (torch.nn.Module): An instance of pytorch model. This model should define all of its layers as attributes of the model. - layers (str or List of str): The fully qualified layer(s) for which the + layers (str or list[str]): The fully qualified layer(s) for which the activation vectors are computed. influence_src_dataset (torch.utils.data.Dataset): PyTorch Dataset that is used to create a PyTorch Dataloader to iterate over the dataset and @@ -166,13 +166,13 @@ def influence( # type: ignore[override] ) -> Dict: r""" Args: - inputs (tensor or tuple of tensors): Batch of examples for which influential + inputs (Tensor or tuple of Tensor): Batch of examples for which influential instances are computed. They are passed to the forward_func. The first dimension in `inputs` tensor or tuple of tensors corresponds to the batch size. A tuple of tensors is only passed in if this is the input form that `module` accepts. top_k (int): The number of top-matching activations to return - additional_forward_args (optional): Additional arguments that will be + additional_forward_args (Any, optional): Additional arguments that will be passed to forward_func after inputs. load_src_from_disk (bool): Loads activations for `influence_src_dataset` where possible. Setting to False would force regeneration of diff --git a/captum/influence/_core/tracincp.py b/captum/influence/_core/tracincp.py index 15811e684b..8cb2ac7bfc 100644 --- a/captum/influence/_core/tracincp.py +++ b/captum/influence/_core/tracincp.py @@ -45,7 +45,7 @@ Implements abstract DataInfluence class and provides implementation details for influence computation based on the logic provided in TracIn paper -(https://arxiv.org/pdf/2002.08484.pdf). +(https://arxiv.org/abs/2002.08484). The TracIn paper proposes an idealized notion of influence which can be represented by the total amount a training example reduces loss for a test example via a training @@ -105,9 +105,10 @@ def __init__( ) -> None: r""" Args: + model (torch.nn.Module): An instance of pytorch model. This model should define all of its layers as attributes of the model. - train_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): + train_dataset (torch.utils.data.Dataset or torch.utils.data.DataLoader): In the `influence` method, we either compute the influence score of training examples on examples in a test batch, or self influence scores for those training examples, depending on which mode is used. @@ -131,7 +132,7 @@ def __init__( `model` accepts `L-1` arguments, and the last element of `batch` is the label. In other words, `model(*batch[:-1])` gives the output of `model`, and `batch[-1]` are the labels for the batch. - checkpoints (str or List of str or Iterator): Either the directory of the + checkpoints (str, list[str], or Iterator): Either the directory of the path to store and retrieve model checkpoints, a list of filepaths with checkpoints from which to load, or an iterator which returns objects from which to load checkpoints. @@ -140,7 +141,7 @@ def __init__( learning rate if it is saved. By default uses a utility to load a model saved as a state dict. Default: _load_flexible_state_dict - layers (List of str or None, optional): A list of layer names for which + layers (list[str] or None, optional): A list of layer names for which gradients should be computed. If `layers` is None, gradients will be computed for all layers. Otherwise, they will only be computed for the layers specified in `layers`. @@ -215,7 +216,7 @@ def self_influence( with are not too large, so that there will not be an out-of-memory error. Args: - batches (Tuple, or DataLoader): Either a single tuple of any, or a + batches (tuple or DataLoader): Either a single tuple of any, or a `DataLoader`, where each batch yielded is a tuple of any. In either case, the tuple represents a single batch, where the last element is assumed to be the labels for the batch. That is, @@ -227,7 +228,7 @@ def self_influence( more details on the assumed structure of a batch. show_progress (bool, optional): Computation of self influence scores can take a long time if `inputs_dataset` represents many examples. If - `show_progress`is true, the progress of this computation will be + `show_progress` is true, the progress of this computation will be displayed. In more detail, this computation will iterate over all checkpoints (provided as the `checkpoints` initialization argument) in an outer loop, and iterate over all batches that @@ -261,9 +262,10 @@ def _get_k_most_influential( ) -> KMostInfluentialResults: r""" Args: - inputs (Tuple of Any): A tuple that represents a batch of examples. It does + + inputs (tuple of Any): A tuple that represents a batch of examples. It does not represent labels, which are passed as `targets`. - targets (tensor, optional): If computing influence scores on a loss + targets (Tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. Default: None k (int, optional): The number of proponents or opponents to return per test @@ -274,7 +276,7 @@ def _get_k_most_influential( Default: True show_progress (bool, optional): To compute the proponents (or opponents) for the batch of examples, we perform computation for each batch in - training dataset `train_dataset`, If `show_progress`is + training dataset `train_dataset`, If `show_progress` is true, the progress of this computation will be displayed. In particular, the number of batches for which the computation has been performed will be displayed. It will try to use tqdm if @@ -309,23 +311,24 @@ def _influence( ) -> Tensor: r""" Args: - inputs (Tuple of Any): A batch of examples. Does not represent labels, + + inputs (tuple of Any): A batch of examples. Does not represent labels, which are passed as `targets`. The assumption is that `model(*inputs)` produces the predictions for the batch. - targets (tensor, optional): If computing influence scores on a loss + targets (Tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. Default: None Returns: - influence_scores (tensor): Influence scores over the entire + influence_scores (Tensor): Influence scores over the entire training dataset `train_dataset`. Dimensionality is (inputs_batch_size, src_dataset_size). For example: influence_scores[i][j] = the influence score for the j-th training example to the i-th input example. show_progress (bool, optional): To compute the influence of examples in training dataset `train_dataset`, we compute the influence - of each batch. If `show_progress`is true, the progress of this + of each batch. If `show_progress` is true, the progress of this computation will be displayed. In particular, the number of batches for which influence has been computed will be displayed. It will try to use tqdm if available for advanced features (e.g. time @@ -369,6 +372,7 @@ def influence( # type: ignore[override] opponent) on the test example. Args: + inputs (Any, optional): If not provided or `None`, the self influence mode will be run. Otherwise, `inputs` is the test batch that will be used when running in either influence score or k-most influential @@ -380,7 +384,7 @@ def influence( # type: ignore[override] `inputs` will need to be a tuple. In other words, `inputs` will be unpacked as an argument when passing to `model`. Default: None - targets (tensor, optional): If computing influence scores on a loss + targets (Tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. Default: None k (int, optional): If not provided or `None`, the influence score mode will @@ -399,7 +403,7 @@ def influence( # type: ignore[override] show_progress (bool, optional): For all modes, computation of results requires "training dataset computations": computations for each batch in the training dataset `train_dataset`, which may - take a long time. If `show_progress`is true, the progress of + take a long time. If `show_progress` is true, the progress of "training dataset computations" will be displayed. In particular, the number of batches for which computations have been performed will be displayed. It will try to use tqdm if available for @@ -501,9 +505,10 @@ def __init__( ) -> None: r""" Args: + model (torch.nn.Module): An instance of pytorch model. This model should define all of its layers as attributes of the model. - train_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): + train_dataset (torch.utils.data.Dataset or torch.utils.data.DataLoader): In the `influence` method, we either compute the influence score of training examples on examples in a test batch, or self influence scores for those training examples, depending on which mode is used. @@ -527,7 +532,7 @@ def __init__( `model` accepts `L-1` arguments, and the last element of `batch` is the label. In other words, `model(*batch[:-1])` gives the output of `model`, and `batch[-1]` are the labels for the batch. - checkpoints (str or List of str or Iterator): Either the directory of the + checkpoints (str, list[str], or Iterator): Either the directory of the path to store and retrieve model checkpoints, a list of filepaths with checkpoints from which to load, or an iterator which returns objects from which to load checkpoints. @@ -536,7 +541,7 @@ def __init__( learning rate if it is saved. By default uses a utility to load a model saved as a state dict. Default: _load_flexible_state_dict - layers (List of str or None, optional): A list of layer names for which + layers (list[str] or None, optional): A list of layer names for which gradients should be computed. If `layers` is None, gradients will be computed for all layers. Otherwise, they will only be computed for the layers specified in `layers`. @@ -698,6 +703,7 @@ def influence( # type: ignore[override] opponent) on the test example. Args: + inputs (Any, optional): If not provided or `None`, the self influence mode will be run. Otherwise, `inputs` is the test batch that will be used when running in either influence score or k-most influential @@ -709,7 +715,7 @@ def influence( # type: ignore[override] `inputs` will need to be a tuple. In other words, `inputs` will be unpacked as an argument when passing to `model`. Default: None - targets (tensor, optional): If computing influence scores on a loss + targets (Tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. Default: None k (int, optional): If not provided or `None`, the influence score mode will @@ -728,7 +734,7 @@ def influence( # type: ignore[override] show_progress (bool, optional): For all modes, computation of results requires "training dataset computations": computations for each batch in the training dataset `train_dataset`, which may - take a long time. If `show_progress`is true, the progress of + take a long time. If `show_progress` is true, the progress of "training dataset computations" will be displayed. It will try to use tqdm if available for advanced features (e.g. time estimation). Otherwise, it will fallback to a simple output of progress. @@ -827,15 +833,16 @@ def _influence( output of `self._basic_computation_tracincp`. Args: - inputs (Tuple of Any): A test batch of examples. Does not represent labels, + + inputs (tuple of Any): A test batch of examples. Does not represent labels, which are passed as `targets`. The assumption is that `model(*inputs)` produces the predictions for the batch. - targets (tensor, optional): If computing influence scores on a loss + targets (Tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. Default: None show_progress (bool, optional): To compute the influence of examples in training dataset `train_dataset`, we compute the influence - of each batch. If `show_progress`is true, the progress of this + of each batch. If `show_progress` is true, the progress of this computation will be displayed. In particular, the number of batches for which influence has been computed will be displayed. It will try to use tqdm if available for advanced features (e.g. time @@ -844,7 +851,7 @@ def _influence( Default: False Returns: - influence_scores (tensor): Influence scores from the TracInCP method. + influence_scores (Tensor): Influence scores from the TracInCP method. Its shape is `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and `train_dataset_size` is the number of examples in @@ -882,7 +889,8 @@ def _get_k_most_influential( ) -> KMostInfluentialResults: r""" Args: - inputs (Tuple of Any): A tuple that represents a batch of examples. It does + + inputs (tuple of Any): A tuple that represents a batch of examples. It does not represent labels, which are passed as `targets`. targets (Tensor, optional): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. @@ -895,7 +903,7 @@ def _get_k_most_influential( Default: True show_progress (bool, optional): To compute the proponents (or opponents) for the batch of examples, we perform computation for each batch in - training dataset `train_dataset`, If `show_progress`is + training dataset `train_dataset`, If `show_progress` is true, the progress of this computation will be displayed. In particular, the number of batches for which the computation has been performed will be displayed. It will try to use tqdm if @@ -964,7 +972,7 @@ def _self_influence_by_checkpoints( times. Args: - batches (Tuple, or DataLoader): Either a single tuple of any, or a + batches (tuple or DataLoader): Either a single tuple of any, or a `DataLoader`, where each batch yielded is a tuple of any. In either case, the tuple represents a single batch, where the last element is assumed to be the labels for the batch. That is, @@ -976,7 +984,7 @@ def _self_influence_by_checkpoints( more details on the assumed structure of a batch. show_progress (bool, optional): Computation of self influence scores can take a long time if `inputs_dataset` represents many examples. If - `show_progress`is true, the progress of this computation will be + `show_progress` is true, the progress of this computation will be displayed. In more detail, this computation will iterate over all checkpoints (provided as the `checkpoints` initialization argument) in an outer loop, and iterate over all batches that @@ -1126,7 +1134,7 @@ def self_influence( for each batch. For large models, loading checkpoints can be time-intensive. Args: - batches (Tuple, or DataLoader): Either a single tuple of any, or a + batches (tuple or DataLoader): Either a single tuple of any, or a `DataLoader`, where each batch yielded is a tuple of any. In either case, the tuple represents a single batch, where the last element is assumed to be the labels for the batch. That is, @@ -1178,7 +1186,8 @@ def _basic_computation_tracincp( and batches. Args: - inputs (Tuple of Any): A batch of examples, which could be a training batch + + inputs (tuple of Any): A batch of examples, which could be a training batch or test batch, depending which method is the caller. Does not represent labels, which are passed as `targets`. The assumption is that `model(*inputs)` produces the predictions for the batch. diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py index 114d4c45bd..0a95a52dc1 100644 --- a/captum/influence/_core/tracincp_fast_rand_proj.py +++ b/captum/influence/_core/tracincp_fast_rand_proj.py @@ -36,7 +36,7 @@ r""" Implements abstract DataInfluence class and also provides implementation details for influence computation based on the logic provided in TracIn paper -(https://arxiv.org/pdf/2002.08484.pdf). +(https://arxiv.org/abs/2002.08484). The TracIn paper proposes an idealized notion of influence which can be represented by the total amount a training example reduces loss for a test example via a training @@ -92,6 +92,7 @@ def __init__( ) -> None: r""" Args: + model (torch.nn.Module): An instance of pytorch model. This model should define all of its layers as attributes of the model. final_fc_layer (torch.nn.Module or str): The last fully connected layer in @@ -99,7 +100,7 @@ def __init__( projection method. Can be either the layer module itself, or the fully qualified name of the layer if it is a defined attribute of the passed `model`. - train_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): + train_dataset (torch.utils.data.Dataset or torch.utils.data.DataLoader): In the `influence` method, we either compute the influence score of training examples on examples in a test batch, or self influence scores for those training examples, depending on which mode is used. @@ -123,7 +124,7 @@ def __init__( `model` accepts `L-1` arguments, and the last element of `batch` is the label. In other words, `model(*batch[:-1])` gives the output of `model`, and `batch[-1]` are the labels for the batch. - checkpoints (str or List of str or Iterator): Either the directory of the + checkpoints (str, list[str], or Iterator): Either the directory of the path to store and retrieve model checkpoints, a list of filepaths with checkpoints from which to load, or an iterator which returns objects from which to load checkpoints. @@ -236,6 +237,7 @@ def influence( # type: ignore[override] opponent) on the test example. Args: + inputs (Any, optional): If not provided or `None`, the self influence mode will be run. Otherwise, `inputs` is the test batch that will be used when running in either influence score or k-most influential @@ -247,7 +249,7 @@ def influence( # type: ignore[override] `inputs` will need to be a tuple. In other words, `inputs` will be unpacked as an argument when passing to `model`. Default: None - targets (tensor, optional): The labels corresponding to the batch `inputs`. + targets (Tensor, optional): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so `targets` is required, unless running in "self influence" mode. Default: None @@ -267,7 +269,7 @@ def influence( # type: ignore[override] show_progress (bool, optional): For all modes, computation of results requires "training dataset computations": computations for each batch in the training dataset `train_dataset`, which may - take a long time. If `show_progress`is true, the progress of + take a long time. If `show_progress` is true, the progress of "training dataset computations" will be displayed. It will try to use tqdm if available for advanced features (e.g. time estimation). Otherwise, it will fallback to a simple output of progress. @@ -281,7 +283,7 @@ def influence( # type: ignore[override] `train_dataset`. The length of this tensor is the number of examples in `train_dataset`, regardless of whether it is a Dataset or DataLoader. - - influence score mode: if this mode is run (`inputs is not None, `k` is + - influence score mode: if this mode is run (`inputs` is not None, `k` is None), returns a 2D tensor `influence_scores` of shape `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and @@ -377,15 +379,16 @@ def _influence( # type: ignore[override] output of `_basic_computation_tracincp_fast`. Args: - inputs (Tuple of Any): A batch of examples. Does not represent labels, + + inputs (tuple of Any): A batch of examples. Does not represent labels, which are passed as `targets`. The assumption is that `model(*inputs)` produces the predictions for the batch. - targets (tensor): The labels corresponding to the batch `inputs`. This + targets (Tensor): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so labels are required. show_progress (bool, optional): To compute the influence of examples in training dataset `train_dataset`, we compute the influence - of each batch. If `show_progress`is true, the progress of this + of each batch. If `show_progress` is true, the progress of this computation will be displayed. In particular, the number of batches for which influence has been computed will be displayed. It will try to use tqdm if available for advanced features (e.g. time @@ -394,7 +397,7 @@ def _influence( # type: ignore[override] Default: False Returns: - influence_scores (tensor): Influence scores from the TracInCPFast method. + influence_scores (Tensor): Influence scores from the TracInCPFast method. Its shape is `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and `train_dataset_size` is the number of examples in @@ -434,9 +437,10 @@ def _get_k_most_influential( # type: ignore[override] ) -> KMostInfluentialResults: r""" Args: - inputs (Tuple of Any): A tuple that represents a batch of examples. It does + + inputs (tuple of Any): A tuple that represents a batch of examples. It does not represent labels, which are passed as `targets`. - targets (tensor): The labels corresponding to the batch `inputs`. This + targets (Tensor): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so labels are required. k (int, optional): The number of proponents or opponents to return per test @@ -447,7 +451,7 @@ def _get_k_most_influential( # type: ignore[override] Default: True show_progress (bool, optional): To compute the proponents (or opponents) for the batch of examples, we perform computation for each batch in - training dataset `train_dataset`, If `show_progress`is + training dataset `train_dataset`, If `show_progress` is true, the progress of this computation will be displayed. In particular, the number of batches for which the computation has been performed will be displayed. It will try to use tqdm if @@ -516,7 +520,7 @@ def _self_influence_by_checkpoints( times. Args: - batches (Tuple, or DataLoader): Either a single tuple of any, or a + batches (tuple or DataLoader): Either a single tuple of any, or a `DataLoader`, where each batch yielded is a tuple of any. In either case, the tuple represents a single batch, where the last element is assumed to be the labels for the batch. That is, @@ -528,7 +532,7 @@ def _self_influence_by_checkpoints( more details on the assumed structure of a batch. show_progress (bool, optional): Computation of self influence scores can take a long time if `inputs_dataset` represents many examples. If - `show_progress`is true, the progress of this computation will be + `show_progress` is true, the progress of this computation will be displayed. In more detail, this computation will iterate over all checkpoints (provided as the `checkpoints` initialization argument) in an outer loop, and iterate over all batches that @@ -660,7 +664,7 @@ def self_influence( for each batch. For large models, loading checkpoints can be time-intensive. Args: - batches (Tuple, or DataLoader): Either a single tuple of any, or a + batches (tuple or DataLoader): Either a single tuple of any, or a `DataLoader`, where each batch yielded is a tuple of any. In either case, the tuple represents a single batch, where the last element is assumed to be the labels for the batch. That is, @@ -713,17 +717,18 @@ def _basic_computation_tracincp_fast( and batches. Args: + influence_instance (TracInCPFast): A instance of TracInCPFast or its children. We assume `influence_instance` has a `loss_fn` attribute, i.e. the loss function applied to the output of the last fully-connected layer, as well as a `reduction_type` attribute, which indicates whether `loss_fn` reduces the per-example losses by using their mean or sum. The `reduction_type` attribute must either be "mean" or "sum". - inputs (Tuple of Any): A batch of examples, which could be a training batch + inputs (tuple of Any): A batch of examples, which could be a training batch or test batch, depending which method is the caller. Does not represent labels, which are passed as `targets`. The assumption is that `model(*inputs)` produces the predictions for the batch. - targets (tensor): If computing influence scores on a loss function, + targets (Tensor): If computing influence scores on a loss function, these are the labels corresponding to the batch `inputs`. Returns: @@ -809,7 +814,7 @@ class TracInCPFastRandProj(TracInCPFast): to obtain proponents / opponents or influence scores will be made in an "interactive" manner, and there is sufficient memory to store vectors for the entire `train_dataset`. This is because in order to enable interactive - analysis, this implementation incures overhead in ``__init__` to setup the + analysis, this implementation incures overhead in `__init__` to setup the nearest-neighbors data structure, which is both time and memory intensive, as vectors corresponding to all training examples needed to be stored. To reduce memory usage, this implementation enables random projections of those vectors. @@ -853,6 +858,7 @@ def __init__( ) -> None: r""" Args: + model (torch.nn.Module): An instance of pytorch model. This model should define all of its layers as attributes of the model. final_fc_layer (torch.nn.Module or str): The last fully connected layer in @@ -860,7 +866,7 @@ def __init__( projection method. Can be either the layer module itself, or the fully qualified name of the layer if it is a defined attribute of the passed `model`. - train_dataset (torch.utils.data.Dataset or torch.utils.DataLoader): + train_dataset (torch.utils.data.Dataset or torch.utils.data.DataLoader): In the `influence` method, we either compute the influence score of training examples on examples in a test batch, or self influence scores for those training examples, depending on which mode is used. @@ -884,7 +890,7 @@ def __init__( `model` accepts `L-1` arguments, and the last element of `batch` is the label. In other words, `model(*batch[:-1])` gives the output of `model`, and `batch[-1]` are the labels for the batch. - checkpoints (str or List of str or Iterator): Either the directory of the + checkpoints (str, list[str], or Iterator): Either the directory of the path to store and retrieve model checkpoints, a list of filepaths with checkpoints from which to load, or an iterator which returns objects from which to load checkpoints. @@ -935,7 +941,7 @@ def __init__( int, and random projection will be performed to ensure that the vector is of dimension no more than `projection_dim` * C. `projection_dim` corresponds to the variable d in the top of page - 15 of the TracIn paper: https://arxiv.org/pdf/2002.08484.pdf. + 15 of the TracIn paper: https://arxiv.org/abs/2002.08484. Default: None seed (int, optional): Because this implementation chooses a random projection, its output is random. Setting this seed specifies the @@ -995,15 +1001,16 @@ def _influence( # type: ignore[override] ) -> Tensor: r""" Args: + inputs (tuple of Any): A batch of examples. Does not represent labels, which are passed as `targets`. The assumption is that `model(*inputs)` produces the predictions for the batch. - targets (tensor): The labels corresponding to the batch `inputs`. This + targets (Tensor): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so labels are required. Returns: - influence_scores (tensor): Influence scores from the + influence_scores (Tensor): Influence scores from the TracInCPFastRandProj method. Its shape is `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and `train_dataset_size` is @@ -1034,9 +1041,10 @@ def _get_k_most_influential( # type: ignore[override] ) -> KMostInfluentialResults: r""" Args: - inputs (Tuple of Any): A tuple that represents a batch of examples. It does + + inputs (tuple of Any): A tuple that represents a batch of examples. It does not represent labels, which are passed as `targets`. - targets (tensor): The labels corresponding to the batch `inputs`. This + targets (Tensor): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so labels are required. k (int, optional): The number of proponents or opponents to return per test @@ -1101,7 +1109,7 @@ def self_influence( with are not too large, so that there will not be an out-of-memory error. Args: - batches (Tuple, or DataLoader): Either a single tuple of any, or a + batches (tuple or DataLoader): Either a single tuple of any, or a `DataLoader`, where each batch yielded is a tuple of any. In either case, the tuple represents a single batch, where the last element is assumed to be the labels for the batch. That is, @@ -1113,7 +1121,7 @@ def self_influence( more details on the assumed structure of a batch. show_progress (bool, optional): Computation of self influence scores can take a long time if `inputs_dataset` represents many examples. If - `show_progress`is true, the progress of this computation will be + `show_progress` is true, the progress of this computation will be displayed. In more detail, this computation will iterate over all checkpoints (provided as the `checkpoints` initialization argument) and all batches that `inputs_dataset` represents. Therefore, the @@ -1187,6 +1195,7 @@ def influence( # type: ignore[override] gradients in the last fully-connected layer, please use `TracInCPFast` instead. Args: + inputs (Any, optional): If not provided or `None`, the self influence mode will be run. Otherwise, `inputs` is the test batch that will be used when running in either influence score or k-most influential @@ -1198,7 +1207,7 @@ def influence( # type: ignore[override] `inputs` will need to be a tuple. In other words, `inputs` will be unpacked as an argument when passing to `model`. Default: None - targets (tensor): The labels corresponding to the batch `inputs`. This + targets (Tensor): The labels corresponding to the batch `inputs`. This method is designed to be applied for a loss function, so `targets` is required. k (int, optional): If not provided or `None`, the influence score mode will @@ -1219,7 +1228,7 @@ def influence( # type: ignore[override] The return value of this method depends on which mode is run. - - influence score mode: if this mode is run (`inputs is not None, `k` is + - influence score mode: if this mode is run (`inputs` is not None, `k` is None), returns a 2D tensor `influence_scores` of shape `(input_size, train_dataset_size)`, where `input_size` is the number of examples in the test batch, and @@ -1275,6 +1284,7 @@ def _set_projections_tracincp_fast_rand_proj( `TracInCPFastRandProj.__init__`. Args: + dataloader (DataLoader): determining the projection requires knowing the dimensionality of the last layer's parameters (`jacobian_dim` below) and its input (`layer_input_dim` below). These are @@ -1282,10 +1292,10 @@ def _set_projections_tracincp_fast_rand_proj( provides that batch. Returns: - jacobian_projection (tensor or None): Projection matrix to apply to + jacobian_projection (Tensor or None): Projection matrix to apply to Jacobian of last layer to reduce its dimension, if needed. None otherwise. - input_projection (tensor or None): Projection matrix to apply to input of + input_projection (Tensor or None): Projection matrix to apply to input of last layer to reduce its dimension, if needed. None otherwise. """ # figure out projection dimensions, if needed @@ -1326,7 +1336,7 @@ def _set_projections_tracincp_fast_rand_proj( # allowable dimension of the "partial" intermediate quantity. Therefore, # we only project if `jacobian_dim` * `layer_input_dim` > `projection_dim`. # `projection_dim` corresponds to the variable d in the top of page 15 of - # the TracIn paper: https://arxiv.org/pdf/2002.08484.pdf. + # the TracIn paper: https://arxiv.org/abs/2002.08484. if jacobian_dim * layer_input_dim > projection_dim: jacobian_projection_dim = min(int(projection_dim**0.5), jacobian_dim) layer_input_projection_dim = min( @@ -1361,7 +1371,8 @@ def _process_src_intermediate_quantities_tracincp_fast_rand_proj( method creates that data structure. This method has side effects. Args: - src_intermediate_quantities (tensor): the output of the + + src_intermediate_quantities (Tensor): the output of the `_get_intermediate_quantities_tracin_fast_rand_proj` function when applied to training dataset `train_dataset`. This output is the vector representation of all training examples. @@ -1401,7 +1412,7 @@ def _get_intermediate_quantities_tracincp_fast_rand_proj( projection is to be applied. Returns: - intermediate_quantities (tensor): A tensor of dimension + intermediate_quantities (Tensor): A tensor of dimension (N, D * C), where N is total number of examples in `dataloader`, C is the number of checkpoints passed as the `checkpoints` argument of `TracInCPFastRandProj.__init__`, and each row represents the @@ -1417,7 +1428,7 @@ def _get_intermediate_quantities_tracincp_fast_rand_proj( performed to ensure that the vector is of dimension no more than `self.projection_dim` * C. `self.projection_dim` corresponds to the variable d in the top of page 15 of the TracIn paper: - https://arxiv.org/pdf/2002.08484.pdf. + https://arxiv.org/abs/2002.08484. """ # if `inputs_dataset` is not a `DataLoader`, turn it into one. inputs_dataset = _format_inputs_dataset(inputs_dataset) @@ -1547,7 +1558,7 @@ def compute_intermediate_quantities( structure of a batch. Returns: - intermediate_quantities (tensor): A tensor of dimension + intermediate_quantities (Tensor): A tensor of dimension (N, D * C), where N is total number of examples in `inputs_dataset`, C is the number of checkpoints passed as the `checkpoints` argument of `TracInCPFastRandProj.__init__`, and each diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py index cd989098c8..356f09b8e6 100644 --- a/captum/influence/_utils/common.py +++ b/captum/influence/_utils/common.py @@ -91,12 +91,12 @@ def _jacobian_loss_wrt_inputs( torch.nn.Module. If a custom loss is provided, it can be either type, but must behave as a library loss function would if `reduction='sum'` or `reduction='mean'`. - out (tensor): This is a tensor that represents the batch of inputs to + out (Tensor): This is a tensor that represents the batch of inputs to `loss_fn`. In practice, this will be the output of a model; this is why this argument is named `out`. `out` is a 2D tensor of shape (batch size, model output dimensionality). We will call `loss_fn` via `loss_fn(out, targets)`. - targets (tensor): The labels for the batch of inputs. + targets (Tensor): The labels for the batch of inputs. vectorize (bool): Flag to use experimental vectorize functionality for `torch.autograd.functional.jacobian`. reduction_type (str): The type of reduction used by `loss_fn`. If `loss_fn` @@ -104,7 +104,7 @@ def _jacobian_loss_wrt_inputs( only be "mean" or "sum". Returns: - jacobians (tensor): Returns the jacobian of the per-sample loss (implicitly + jacobians (Tensor): Returns the jacobian of the per-sample loss (implicitly defined by `loss_fn` and `reduction_type`) w.r.t each sample in the batch represented by `out`. This is a 2D tensor, where the first dimension is the batch dimension. @@ -153,8 +153,9 @@ def _load_flexible_state_dict(model: Module, path: str) -> float: state_dict and other information. Args: - model: The model for which to load a checkpoint - path: The filepath to the checkpoint + + model (torch.nn.Module): The model for which to load a checkpoint + path (str): The filepath to the checkpoint The module state_dict is modified in-place, and the learning rate is returned. """ @@ -203,7 +204,7 @@ def _get_k_most_influential_helper( influence_batch_fn (Callable): A callable that will be called via `influence_batch_fn(inputs, targets, batch)`, where `batch` is a batch in the `influence_src_dataloader` argument. - inputs (Tuple of Any): A batch of examples. Does not represent labels, + inputs (tuple of Any): A batch of examples. Does not represent labels, which are passed as `targets`. targets (Tensor, optional): If computing TracIn scores on a loss function, these are the labels corresponding to the batch `inputs`. @@ -216,7 +217,7 @@ def _get_k_most_influential_helper( Default: True show_progress (bool, optional): To compute the proponents (or opponents) for the batch of examples, we perform computation for each batch in - training dataset `influence_src_dataloader`, If `show_progress`is + training dataset `influence_src_dataloader`, If `show_progress` is true, the progress of this computation will be displayed. In particular, the number of batches for which the computation has been performed will be displayed. It will try to use tqdm if @@ -354,7 +355,7 @@ def _self_influence_by_batches_helper( instance_name (str): This is the name of the implementation class that `self_influence_batch_fn` is a method of. This is used for displaying warning messages. - batches (Tuple, or DataLoader): Either a single tuple of any, or a + batches (tuple or DataLoader): Either a single tuple of any, or a `DataLoader`, where each batch yielded is a tuple of any. In either case, the tuple represents a single batch, where the last element is assumed to be the labels for the batch. That is, diff --git a/captum/influence/_utils/nearest_neighbors.py b/captum/influence/_utils/nearest_neighbors.py index 3ecd452de3..fa8d6d7136 100644 --- a/captum/influence/_utils/nearest_neighbors.py +++ b/captum/influence/_utils/nearest_neighbors.py @@ -34,7 +34,7 @@ def get_nearest_neighbors( so that `query` is 2D. Args: - query (tensor): tensor representing the batch of tensors for which k-nearest + query (Tensor): tensor representing the batch of tensors for which k-nearest neighbors are desired. `query` is of shape (N, *), where N is the size of the batch, i.e. the 0-th dimension of `query` indexes the batch. * denotes an arbitrary shape, so that each tensor in the @@ -68,7 +68,7 @@ def setup(self, data: torch.Tensor) -> None: dimension indexes the tensors in the stored tensors. Args: - data (tensor): A tensor of shape (N, *) representing the stored tensors. + data (Tensor): A tensor of shape (N, *) representing the stored tensors. The 0-th dimension indexes the tensors in the stored tensors, so that `data[i]` is the tensor with index `i`. The nearest neighbors of a query will be referred to by their index. @@ -129,7 +129,7 @@ def setup(self, data: torch.Tensor) -> None: tensors. Args: - data (tensor): A tensor of shape (N, *) representing the stored tensors. + data (Tensor): A tensor of shape (N, *) representing the stored tensors. The 0-th dimension indexes the tensors in the stored tensors, so that `data[i]` is the tensor with index `i`. The nearest neighbors of a query will be referred to by their index. @@ -160,7 +160,7 @@ def get_nearest_neighbors( dot-product of the flattened version of tensors. Args: - query (tensor): tensor representing the batch of tensors for which k-nearest + query (Tensor): tensor representing the batch of tensors for which k-nearest neighbors are desired. `query` is of shape (N, *), where N is the size of the batch, i.e. the 0-th dimension of `query` indexes the batch. * denotes an arbitrary shape, so that each tensor in the diff --git a/captum/insights/__init__.py b/captum/insights/__init__.py index 48ba6fdfa0..2ba766cdd2 100644 --- a/captum/insights/__init__.py +++ b/captum/insights/__init__.py @@ -1 +1 @@ -from captum.insights.attr_vis import AttributionVisualizer, Batch # noqa +from captum.insights.attr_vis import AttributionVisualizer, Batch, features # noqa diff --git a/captum/insights/attr_vis/app.py b/captum/insights/attr_vis/app.py index 9a0433090b..fe7e0bbcda 100644 --- a/captum/insights/attr_vis/app.py +++ b/captum/insights/attr_vis/app.py @@ -108,7 +108,7 @@ def __init__( Args: - inputs (tensor or tuple of tensors): Batch of inputs for a model. + inputs (Tensor or tuple of Tensor): Batch of inputs for a model. These may be either a Tensor or tuple of tensors. Each tensor must correspond to a feature for AttributionVisualizer, and the corresponding input transform function of the feature @@ -116,7 +116,7 @@ def __init__( model. It is assumed that the first dimension of each input tensor corresponds to the number of examples (batch size) and is aligned for all input tensors. - labels (tensor): Tensor containing correct labels for input examples. + labels (Tensor): Tensor containing correct labels for input examples. This must be a 1D tensor with length matching the first dimension of each input tensor. additional_args (tuple, optional): If the forward function @@ -149,11 +149,11 @@ def __init__( r""" Args: - models (torch.nn.module): One or more PyTorch modules (models) for + models (torch.nn.Module): One or more PyTorch modules (models) for attribution visualization. - classes (list of string): List of strings corresponding to the names of + classes (list[str]): List of strings corresponding to the names of classes for classification. - features (list of BaseFeature): List of BaseFeatures, which correspond + features (list[BaseFeature]): List of BaseFeatures, which correspond to input arguments to the model. Each feature object defines relevant transformations for converting to model input, constructing baselines, and visualizing. The length of the @@ -163,10 +163,10 @@ def __init__( a single BaseFeature, while a multimodal classifier may provide a list of features, each corresponding to a different tensor input and potentially different modalities. - dataset (iterable of Batch): Defines the dataset to visualize attributions + dataset (Iterable of Batch): Defines the dataset to visualize attributions for. This must be an iterable of batch objects, each of which may contain multiple input examples. - score_func (callable, optional): This function is applied to the model + score_func (Callable, optional): This function is applied to the model output to obtain the score for each class. For instance, this function could be the softmax or final non-linearity of the network, applied to the model output. The indices @@ -175,7 +175,7 @@ def __init__( are taken directly and assumed to correspond to the class scores. Default: None - use_label_for_attr (boolean, optional): If true, the class index is passed + use_label_for_attr (bool, optional): If true, the class index is passed to the relevant attribution method. This is necessary in most cases where there is an output neuron corresponding to each class. When the model output is a scalar and class index diff --git a/captum/insights/attr_vis/features.py b/captum/insights/attr_vis/features.py index 0986170758..9a048e57a6 100644 --- a/captum/insights/attr_vis/features.py +++ b/captum/insights/attr_vis/features.py @@ -43,16 +43,16 @@ def __init__( name (str): The label of the specific feature. For example, an ImageFeature's name can be "Photo". - baseline_transforms (list, callable, optional): Optional list of + baseline_transforms (list, Callable, optional): Optional list of callables (e.g. functions) to be called on the input tensor to construct multiple baselines. Currently only one baseline is supported. See :py:class:`.IntegratedGradients` for more information about baselines. - input_transforms (list, callable, optional): Optional list of callables + input_transforms (list, Callable, optional): Optional list of callables (e.g. functions) called on the input tensor sequentially to convert it into the format expected by the model. - visualization_transform (callable, optional): Optional callable (e.g. + visualization_transform (Callable, optional): Optional callable (e.g. function) applied as a postprocessing step of the original input data (before ``input_transforms``) to convert it to a format to be understood by the frontend visualizer as @@ -89,16 +89,16 @@ def __init__( Args: name (str): The label of the specific feature. For example, an ImageFeature's name can be "Photo". - baseline_transforms (list, callable, optional): Optional list of + baseline_transforms (list, Callable, optional): Optional list of callables (e.g. functions) to be called on the input tensor to construct multiple baselines. Currently only one baseline is supported. See :py:class:`.IntegratedGradients` for more information about baselines. - input_transforms (list, callable, optional): A list of transforms + input_transforms (list, Callable, optional): A list of transforms or transform to be applied to the input. For images, normalization is often applied here. - visualization_transform (callable, optional): Optional callable (e.g. + visualization_transform (Callable, optional): Optional callable (e.g. function) applied as a postprocessing step of the original input data (before input_transforms) to convert it to a format to be visualized. @@ -164,7 +164,7 @@ def __init__( Args: name (str): The label of the specific feature. For example, an ImageFeature's name can be "Photo". - baseline_transforms (list, callable, optional): Optional list of + baseline_transforms (list, Callable, optional): Optional list of callables (e.g. functions) to be called on the input tensor to construct multiple baselines. Currently only one baseline is supported. See @@ -174,7 +174,7 @@ def __init__( corresponding to PAD with the same size as the input tensor. See :py:class:`.TokenReferenceBase` for more information. - input_transforms (list, callable, optional): A list of transforms + input_transforms (list, Callable, optional): A list of transforms or transform to be applied to the input. For text, a common transform is to convert the tokenized input tensor into an interpretable embedding. See @@ -182,7 +182,7 @@ def __init__( and :py:func:`~.configure_interpretable_embedding_layer` for more information. - visualization_transform (callable, optional): Optional callable (e.g. + visualization_transform (Callable, optional): Optional callable (e.g. function) applied as a postprocessing step of the original input data (before ``input_transforms``) to convert it to a suitable format for visualization. For text features, diff --git a/captum/metrics/_core/infidelity.py b/captum/metrics/_core/infidelity.py index 33f485a78e..a10b2e2812 100644 --- a/captum/metrics/_core/infidelity.py +++ b/captum/metrics/_core/infidelity.py @@ -44,12 +44,12 @@ def sub_infidelity_perturb_func_decorator(pertub_func: Callable) -> Callable: r""" Args: - pertub_func(callable): Input perturbation function that takes inputs + pertub_func(Callable): Input perturbation function that takes inputs and optionally baselines and returns perturbed inputs Returns: - default_perturb_func(callable): Internal default perturbation + default_perturb_func(Callable): Internal default perturbation function that computes the perturbations internally and returns perturbations and perturbed inputs. @@ -126,7 +126,7 @@ def infidelity( and the differences between the predictor function at its input and perturbed input. More details about the measure can be found in the following paper: - https://arxiv.org/pdf/1901.09392.pdf + https://arxiv.org/abs/1901.09392 It is derived from the completeness property of well-known attribution algorithms and is a computationally more efficient and generalized @@ -134,7 +134,7 @@ def infidelity( of the attributions and the differences of the predictor function at its input and fixed baseline. More details about the Sensitivity-n can be found here: - https://arxiv.org/pdf/1711.06104.pdfs + https://arxiv.org/abs/1711.06104 The users can perturb the inputs any desired way by providing any perturbation function that takes the inputs (and optionally baselines) @@ -147,10 +147,10 @@ def infidelity( Args: - forward_func (callable): + forward_func (Callable): The forward function of the model or any modification of it. - perturb_func (callable): + perturb_func (Callable): The perturbation function of model inputs. This function takes model inputs and optionally baselines as input arguments and returns either a tuple of perturbations and perturbed inputs or just @@ -205,12 +205,13 @@ def infidelity( Similar to previous case here as well we need to return only perturbed inputs in case `infidelity_perturb_func_decorator` decorates out `perturb_func`. + It is important to note that for performance reasons `perturb_func` isn't called for each example individually but on a batch of input examples that are repeated `max_examples_per_batch / batch_size` times within the batch. - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which attributions are computed. If forward_func takes a single tensor as input, a single input tensor should be provided. If forward_func takes multiple tensors as input, a tuple @@ -220,7 +221,7 @@ def infidelity( multiple input tensors are provided, the examples must be aligned appropriately. - baselines (scalar, tensor, tuple of scalars or tensors, optional): + baselines (scalar, Tensor, tuple of scalar, or Tensor, optional): Baselines define reference values which sometimes represent ablated values and are used to compare with the actual inputs to compute importance scores in attribution algorithms. They can be represented @@ -249,13 +250,13 @@ def infidelity( Default: None - attributions (tensor or tuple of tensors): + attributions (Tensor or tuple of Tensor): Attribution scores computed based on an attribution algorithm. This attribution scores can be computed using the implementations provided in the `captum.attr` package. Some of those attribution approaches are so called global methods, which means that they factor in model inputs' multiplier, as described in: - https://arxiv.org/pdf/1711.06104.pdf + https://arxiv.org/abs/1711.06104 Many global attribution algorithms can be used in local modes, meaning that the inputs multiplier isn't factored in the attribution scores. @@ -271,7 +272,7 @@ def infidelity( For local attributions we can use real-valued perturbations whereas for global attributions that perturbation is binary. - https://arxiv.org/pdf/1901.09392.pdf + https://arxiv.org/abs/1901.09392 If we want to compute the infidelity of global attributions we can use a binary perturbation matrix that will allow us to select @@ -291,7 +292,7 @@ def infidelity( tensor as well. If inputs is provided as a tuple of tensors then attributions will be tuples of tensors as well. - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. It must be either a single additional @@ -304,7 +305,7 @@ def infidelity( being passed to `perturb_func` as an input argument. Default: None - target (int, tuple, tensor or list, optional): Indices for selecting + target (int, tuple, Tensor, or list, optional): Indices for selecting predictions from output(for classification cases, this is usually the target class). If the network returns a scalar value per example, no target @@ -365,7 +366,7 @@ def infidelity( Default: False Returns: - infidelities (tensor): A tensor of scalar infidelity scores per + infidelities (Tensor): A tensor of scalar infidelity scores per input example. The first dimension is equal to the number of examples in the input batch and the second dimension is one. diff --git a/captum/metrics/_core/sensitivity.py b/captum/metrics/_core/sensitivity.py index 77d87e6291..f0c841a5a1 100644 --- a/captum/metrics/_core/sensitivity.py +++ b/captum/metrics/_core/sensitivity.py @@ -30,8 +30,8 @@ def default_perturb_func( Args: - inputs (tensor or a tuple of tensors): The input tensors that we'd - like to perturb by adding a random noise sampled unifromly + inputs (Tensor or tuple of Tensor): The input tensors that we'd + like to perturb by adding a random noise sampled uniformly random from an L_infinity ball with a radius `perturb_radius`. radius (float): A radius used for sampling from @@ -39,8 +39,8 @@ def default_perturb_func( Returns: - perturbed_input (tuple(tensor)): A list of perturbed inputs that - are createed by adding noise sampled uniformly random + perturbed_input (tuple of Tensor): A list of perturbed inputs that + are created by adding noise sampled uniformly random from L_infiniy ball with a radius `perturb_radius` to the original inputs. @@ -90,7 +90,7 @@ def sensitivity_max( More about the Lipschitz Continuity Metric can also be found here `On the Robustness of Interpretability Methods` - https://arxiv.org/pdf/1806.08049.pdf + https://arxiv.org/abs/1806.08049 and `Towards Robust Interpretability with Self-Explaining Neural Networks` https://papers.nips.cc/paper\ @@ -99,16 +99,16 @@ def sensitivity_max( More details about sensitivity max can be found here: `On the (In)fidelity and Sensitivity of Explanations` - https://arxiv.org/pdf/1901.09392.pdf + https://arxiv.org/abs/1901.09392 Args: - explanation_func (callable): + explanation_func (Callable): This function can be the `attribute` method of an attribution algorithm or any other explanation method that returns the explanations. - inputs (tensor or tuple of tensors): Input for which + inputs (Tensor or tuple of Tensor): Input for which explanations are computed. If `explanation_func` takes a single tensor as input, a single input tensor should be provided. @@ -119,7 +119,7 @@ def sensitivity_max( multiple input tensors are provided, the examples must be aligned appropriately. - perturb_func (callable): + perturb_func (Callable): The perturbation function of model inputs. This function takes model inputs and optionally `perturb_radius` if the function takes more than one argument and returns @@ -138,7 +138,7 @@ def sensitivity_max( perturb_radius (float, optional): The epsilon radius used for sampling. In the `default_perturb_func` it is used as the radius of the L-Infinity ball. In a general case it can serve as a radius of - any L_p nom. + any L_p norm. This argument is passed to `perturb_func` if it takes more than one argument. @@ -149,10 +149,12 @@ def sensitivity_max( `perturb_func` function. Default: 10 - norm_ord (int, float, inf, -inf, 'fro', 'nuc', optional): The type of norm - that is used to compute the - norm of the sensitivity matrix which is defined as the difference - between the explanation function at its input and perturbed input. + norm_ord (int, float, or str, optional): The type of norm that is used to + compute the norm of the sensitivity matrix which is defined as the + difference between the explanation function at its input and perturbed + input. Acceptable values are either a string of 'fro' or 'nuc', or a + number in the range of [-inf, inf] (including float("-inf") & + float("inf")). Default: 'fro' max_examples_per_batch (int, optional): The number of maximum input @@ -176,7 +178,7 @@ def sensitivity_max( Returns: - sensitivities (tensor): A tensor of scalar sensitivity scores per + sensitivities (Tensor): A tensor of scalar sensitivity scores per input example. The first dimension is equal to the number of examples in the input batch and the second dimension is one. Returned sensitivities are normalized by diff --git a/captum/metrics/_utils/batching.py b/captum/metrics/_utils/batching.py index ee3b38f58e..83a773bda3 100644 --- a/captum/metrics/_utils/batching.py +++ b/captum/metrics/_utils/batching.py @@ -28,9 +28,9 @@ def _divide_and_aggregate_metrics( attributions for. n_perturb_samples (int): The number of samples per example that are used for perturbation purposes for example. - metric_func (callable): This function takes the number of samples per + metric_func (Callable): This function takes the number of samples per input batch and returns an overall metric for each example. - agg_func (callable, optional): This function is used to aggregate the + agg_func (Callable, optional): This function is used to aggregate the metrics across multiple sub-batches and that are generated by `metric_func`. max_examples_per_batch (int, optional): The maximum number of allowed examples @@ -38,7 +38,7 @@ def _divide_and_aggregate_metrics( Returns: - metric (tensor): A metric score estimated by `metric_func` per + metric (Tensor): A metric score estimated by `metric_func` per input example. """ bsz = inputs[0].size(0) diff --git a/captum/robust/_core/fgsm.py b/captum/robust/_core/fgsm.py index f717481ccd..0e42d08c37 100644 --- a/captum/robust/_core/fgsm.py +++ b/captum/robust/_core/fgsm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from typing import Any, Callable, Tuple +from typing import Any, Callable, Optional, Tuple import torch from captum._utils.common import ( @@ -21,37 +21,44 @@ class FGSM(Perturbation): r""" - Fast Gradient Sign Method is an one-step method that can generate - adversarial examples. For non-targeted attack, the formulation is - x' = x + epsilon * sign(gradient of L(theta, x, y)). - For targeted attack on t, the formulation is - x' = x - epsilon * sign(gradient of L(theta, x, t)). - L(theta, x, y) is the model's loss function with respect to model + Fast Gradient Sign Method is a one-step method that can generate + adversarial examples. + + For non-targeted attack, the formulation is:: + + x' = x + epsilon * sign(gradient of L(theta, x, y)) + + For targeted attack on t, the formulation is:: + + x' = x - epsilon * sign(gradient of L(theta, x, t)) + + ``L(theta, x, y)`` is the model's loss function with respect to model parameters, inputs and labels. More details on Fast Gradient Sign Method can be found in the original - paper: - https://arxiv.org/pdf/1412.6572.pdf + paper: https://arxiv.org/abs/1412.6572 """ def __init__( self, forward_func: Callable, - loss_func: Callable = None, + loss_func: Optional[Callable] = None, lower_bound: float = float("-inf"), upper_bound: float = float("inf"), ) -> None: r""" Args: - forward_func (callable): The pytorch model for which the attack is + forward_func (Callable): The pytorch model for which the attack is computed. - loss_func (callable, optional): Loss function of which the gradient + loss_func (Callable, optional): Loss function of which the gradient computed. The loss function should take in outputs of the model and labels, and return a loss tensor. The default loss function is negative log. lower_bound (float, optional): Lower bound of input values. + Default: ``float("-inf")`` upper_bound (float, optional): Upper bound of input values. e.g. image pixels must be in the range 0-255 + Default: ``float("inf")`` Attributes: bound (Callable): A function that bounds the input values based on @@ -80,13 +87,13 @@ def perturb( Args: - inputs (tensor or tuple of tensors): Input for which adversarial + inputs (Tensor or tuple of Tensor): Input for which adversarial attack is computed. It can be provided as a single tensor or a tuple of multiple tensors. If multiple input tensors are provided, the batch sizes must be - aligned accross all tensors. + aligned across all tensors. epsilon (float): Step size of perturbation. - target (any): True labels of inputs if non-targeted attack is + target (Any): True labels of inputs if non-targeted attack is desired. Target class of inputs if targeted attack is desired. Target will be passed to the loss function to compute loss, so the type needs to match the @@ -112,7 +119,8 @@ def perturb( examples in inputs (dim 0), and each tuple containing #output_dims - 1 elements. Each tuple is applied as the label for the corresponding example. - additional_forward_args (any, optional): If the forward function + + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. These arguments are provided to @@ -124,7 +132,7 @@ def perturb( Returns: - - **perturbed inputs** (*tensor* or tuple of *tensors*): + - **perturbed inputs** (*Tensor* or tuple of *Tensor*): Perturbed input for each input tensor. The perturbed inputs have the same shape and dimensionality as the inputs. @@ -167,7 +175,7 @@ def _perturb( r""" A helper function to calculate the perturbed inputs given original inputs, gradient of loss function and epsilon. The calculation is - different for targetd v.s. non-targeted as described above. + different for targeted v.s. non-targeted as described above. """ multiplier = -1 if targeted else 1 inputs = tuple( diff --git a/captum/robust/_core/metrics/attack_comparator.py b/captum/robust/_core/metrics/attack_comparator.py index b9ebb59ad6..7964711883 100644 --- a/captum/robust/_core/metrics/attack_comparator.py +++ b/captum/robust/_core/metrics/attack_comparator.py @@ -60,15 +60,15 @@ def __init__( self, forward_func: Callable, metric: Callable[..., MetricResultType], - preproc_fn: Callable = None, + preproc_fn: Optional[Callable] = None, ) -> None: r""" Args: - forward_func (callable or torch.nn.Module): This can either be an instance + forward_func (Callable or torch.nn.Module): This can either be an instance of pytorch model or any modification of a model's forward function. - metric (callable): This function is applied to the model output in + metric (Callable): This function is applied to the model output in order to compute the desired performance metric or metrics. This function should have the following signature:: @@ -85,9 +85,10 @@ def __init__( If tensor metrics represent results for the full batch, the size of the first dimension should be 1. - preproc_fn (callable, optional): Optional method applied to inputs. Output + preproc_fn (Callable, optional): Optional method applied to inputs. Output of preproc_fn is then provided as input to model, in addition to additional_forward_args provided to evaluate. + Default: ``None`` """ self.forward_func = forward_func self.metric: Callable = metric @@ -113,7 +114,8 @@ def add_attack( Adds attack to be evaluated when calling evaluate. Args: - attack (perturbation or callable): This can either be an instance + + attack (Perturbation or Callable): This can either be an instance of a Captum Perturbation / Attack or any other perturbation or attack function such as a torchvision transform. @@ -121,23 +123,29 @@ def add_attack( name (str, optional): Name or identifier for attack, used as key for attack results. This defaults to attack.__class__.__name__ if not provided and must be unique for all added attacks. + Default: ``None`` - num_attempts (int): Number of attempts that attack should be + num_attempts (int, optional): Number of attempts that attack should be repeated. This should only be set to > 1 for non-deterministic attacks. The minimum, maximum, and average (best, worst, and average case) are tracked for attack attempts. - - apply_before_preproc (bool): Defines whether attack should be applied - before or after preproc function. - - attack_kwargs (dict): Additional arguments to be provided to given attack. - This should be provided as a dictionary of keyword arguments. - - additional_attack_arg_names (list[str]): Any additional arguments for the - attack which are specific to the particular input example or batch. - An example of this is target, which is necessary for some attacks such - as FGSM or PGD. These arguments are included if provided as a kwarg - to evaluate. + Default: ``1`` + + apply_before_preproc (bool, optional): Defines whether attack should be + applied before or after preproc function. + Default: ``True`` + + attack_kwargs (dict, optional): Additional arguments to be provided to + given attack. This should be provided as a dictionary of keyword + arguments. + Default: ``None`` + + additional_attack_arg_names (list[str], optional): Any additional + arguments for the attack which are specific to the particular input + example or batch. An example of this is target, which is necessary + for some attacks such as FGSM or PGD. These arguments are included + if provided as a kwarg to evaluate. + Default: ``None`` """ if name is None: name = attack.__class__.__name__ @@ -239,7 +247,7 @@ def evaluate( Args: - inputs (any): Input for which attack metrics + inputs (Any): Input for which attack metrics are computed. It can be provided as a tensor, tuple of tensors, or any raw input type (e.g. PIL image or text string). This input is provided directly as input to preproc function as well @@ -247,7 +255,7 @@ def evaluate( function is provided, this input is provided directly to the main model and all attacks. - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the preprocessing outputs (or inputs if preproc_fn is None), this argument can be provided. It must be either a single additional @@ -259,8 +267,8 @@ def evaluate( For a tensor, the first dimension of the tensor must correspond to the number of examples. For all other types, the given argument is used for all forward evaluations. - Default: None - perturbations_per_eval (int, optional): Allows perturbations of multiple + Default: ``None`` + perturbations_per_eval (int, optional): Allows perturbations of multiple attacks to be grouped and evaluated in one call of forward_fn Each forward pass will contain a maximum of perturbations_per_eval * #examples samples. @@ -272,9 +280,10 @@ def evaluate( In order to apply this functionality, the output of preproc_fn (or inputs itself if no preproc_fn is provided) must be a tensor or tuple of tensors. - Default: 1 - kwargs (any, optional): Additional keyword arguments provided to metric function - as well as selected attacks based on chosen additional_args + Default: ``1`` + kwargs (Any, optional): Additional keyword arguments provided to metric + function as well as selected attacks based on chosen additional_args. + Default: ``None`` Returns: diff --git a/captum/robust/_core/metrics/min_param_perturbation.py b/captum/robust/_core/metrics/min_param_perturbation.py index 99308727e4..95b2897a08 100644 --- a/captum/robust/_core/metrics/min_param_perturbation.py +++ b/captum/robust/_core/metrics/min_param_perturbation.py @@ -63,7 +63,7 @@ def __init__( corresponding perturbed input. Args: - forward_func (callable or torch.nn.Module): This can either be an instance + forward_func (Callable or torch.nn.Module): This can either be an instance of pytorch model or any modification of a model's forward function. @@ -85,23 +85,23 @@ def __init__( arg_step (int, float): Minimum interval for increase of target variable. mode (str, optional): Mode for search of minimum attack value; - either 'linear' for linear search on variable, or 'binary' for + either ``linear`` for linear search on variable, or ``binary`` for binary search of variable - Default: 'linear' + Default: ``linear`` num_attempts (int, optional): Number of attempts or trials with given variable. This should only be set to > 1 for non-deterministic perturbation / attack functions - Default: 1 + Default: ``1`` - preproc_fn (callable, optional): Optional method applied to inputs. Output + preproc_fn (Callable, optional): Optional method applied to inputs. Output of preproc_fn is then provided as input to model, in addition to additional_forward_args provided to evaluate. - Default: None + Default: ``None`` apply_before_preproc (bool, optional): Defines whether attack should be applied before or after preproc function. - Default: False + Default: ``False`` correct_fn (Callable, optional): This determines whether the perturbed input leads to a correct or incorrect prediction. By default, this function @@ -114,13 +114,15 @@ def __init__( function must be provided which determines correctness. The first argument to this function must be the model out; - any additional arguments should be provided through correct_fn_kwargs. + any additional arguments should be provided through + ``correct_fn_kwargs``. This function should have the following signature: + def correct_fn(model_out: Tensor, **kwargs: Any) -> bool Method should return a boolean if correct (True) and incorrect (False). - Default: None (applies standard correct_fn for classification) + Default: ``None`` (applies standard correct_fn for classification) """ self.forward_func = forward_func self.attack = attack @@ -363,7 +365,7 @@ def evaluate( pre-processing function is provided, this input is provided directly to the main model and all attacks. - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the preprocessing outputs (or inputs if preproc_fn is None), this argument can be provided. It must be either a single additional @@ -375,9 +377,9 @@ def evaluate( For a tensor, the first dimension of the tensor must correspond to the number of examples. For all other types, the given argument is used for all forward evaluations. - Default: None + Default: ``None`` target (TargetType): Target class for classification. This is required if - using the default correct_fn + using the default ``correct_fn``. perturbations_per_eval (int, optional): Allows perturbations of multiple attacks to be grouped and evaluated in one call of forward_fn @@ -391,10 +393,10 @@ def evaluate( In order to apply this functionality, the output of preproc_fn (or inputs itself if no preproc_fn is provided) must be a tensor or tuple of tensors. - Default: 1 - attack_kwargs (dictionary, optional): Optional dictionary of keyword + Default: ``1`` + attack_kwargs (dict, optional): Optional dictionary of keyword arguments provided to attack function - correct_fn_kwargs (dictionary, optional): Optional dictionary of keyword + correct_fn_kwargs (dict, optional): Optional dictionary of keyword arguments provided to correct function Returns: diff --git a/captum/robust/_core/perturbation.py b/captum/robust/_core/perturbation.py index 9eb6d53481..c47b02dd78 100644 --- a/captum/robust/_core/perturbation.py +++ b/captum/robust/_core/perturbation.py @@ -18,15 +18,15 @@ class Perturbation: Args: - inputs (tensor or tuple of tensors): Input for which adversarial attack + inputs (Tensor or tuple of Tensor): Input for which adversarial attack is computed. It can be provided as a single tensor or a tuple of multiple tensors. If multiple input tensors - are provided, the batch sizes must be aligned accross all + are provided, the batch sizes must be aligned across all tensors. Returns: - - **perturbed inputs** (*tensor* or tuple of *tensors*): + - **perturbed inputs** (*Tensor* or tuple of *Tensor*): Perturbed input for each input tensor. The perturbed inputs have the same shape and dimensionality as the inputs. diff --git a/captum/robust/_core/pgd.py b/captum/robust/_core/pgd.py index b14239c681..733cbcc488 100644 --- a/captum/robust/_core/pgd.py +++ b/captum/robust/_core/pgd.py @@ -31,8 +31,7 @@ class PGD(Perturbation): x_(t+1) = Clip_r(x_t - alpha * sign(gradient of L(theta, x, t))) More details on Projected Gradient Descent can be found in the original - paper: - https://arxiv.org/pdf/1706.06083.pdf + paper: https://arxiv.org/abs/1706.06083 """ def __init__( @@ -44,15 +43,17 @@ def __init__( ) -> None: r""" Args: - forward_func (callable): The pytorch model for which the attack is + forward_func (Callable): The pytorch model for which the attack is computed. - loss_func (callable, optional): Loss function of which the gradient + loss_func (Callable, optional): Loss function of which the gradient computed. The loss function should take in outputs of the model and labels, and return the loss for each input tensor. The default loss function is negative log. lower_bound (float, optional): Lower bound of input values. + Default: ``float("-inf")`` upper_bound (float, optional): Upper bound of input values. e.g. image pixels must be in the range 0-255 + Default: ``float("inf")`` Attributes: bound (Callable): A function that bounds the input values based on @@ -82,17 +83,17 @@ def perturb( Args: - inputs (tensor or tuple of tensors): Input for which adversarial + inputs (Tensor or tuple of Tensor): Input for which adversarial attack is computed. It can be provided as a single tensor or a tuple of multiple tensors. If multiple input tensors are provided, the batch sizes must be - aligned accross all tensors. + aligned across all tensors. radius (float): Radius of the neighbor ball centered around inputs. The perturbation should be within this range. step_size (float): Step size of each gradient step. step_num (int): Step numbers. It usually guarantees that the perturbation can reach the border. - target (any): True labels of inputs if non-targeted attack is + target (Any): True labels of inputs if non-targeted attack is desired. Target class of inputs if targeted attack is desired. Target will be passed to the loss function to compute loss, so the type needs to match the @@ -118,23 +119,23 @@ def perturb( examples in inputs (dim 0), and each tuple containing #output_dims - 1 elements. Each tuple is applied as the label for the corresponding example. - additional_forward_args (any, optional): If the forward function + additional_forward_args (Any, optional): If the forward function requires additional arguments other than the inputs for which attributions should not be computed, this argument can be provided. These arguments are provided to forward_func in order following the arguments in inputs. - Default: None. + Default: ``None`` targeted (bool, optional): If attack should be targeted. - Default: False. + Default: ``False`` random_start (bool, optional): If a random initialization is added to - inputs. Default: False. + inputs. Default: ``False`` norm (str, optional): Specifies the norm to calculate distance from - original inputs: 'Linf'|'L2'. - Default: 'Linf'. + original inputs: ``Linf`` | ``L2``. + Default: ``Linf`` Returns: - - **perturbed inputs** (*tensor* or tuple of *tensors*): + - **perturbed inputs** (*Tensor* or tuple of *Tensor*): Perturbed input for each input tensor. The perturbed inputs have the same shape and dimensionality as the inputs. diff --git a/docs/algorithms.md b/docs/attribution_algorithms.md similarity index 99% rename from docs/algorithms.md rename to docs/attribution_algorithms.md index b06a8aa5f1..f1d00a8f53 100644 --- a/docs/algorithms.md +++ b/docs/attribution_algorithms.md @@ -1,5 +1,5 @@ --- -id: algorithms +id: attribution_algorithms title: Algorithm Descriptions --- diff --git a/docs/contribution_guide.md b/docs/contribution_guide.md index f8aacf1c85..82e4f158a2 100644 --- a/docs/contribution_guide.md +++ b/docs/contribution_guide.md @@ -4,7 +4,7 @@ title: The Captum Contribution Process --- The Captum development process involves a healthy amount of open discussions between the core development team and the community. -Captum operates similar to most open source projects on GitHub. However, if you've never contributed to an open source project before, here is the basic process. +Captum operates similarly to most open source projects on GitHub. However, if you've never contributed to an open source project before, here is the basic process. 1. **Figure out what you're going to work on.** @@ -59,7 +59,7 @@ https://captum.ai/tutorials/Bert_SQUAD_Interpret https://captum.ai/tutorials/IMDB_TorchText_Interpret **Vision** -- We provide a sample toy model for CIFAR dataset and examples with ResNet model. +- We provide a sample toy model for the CIFAR dataset and examples with a ResNet model. https://captum.ai/tutorials/CIFAR_TorchVision_Interpret https://captum.ai/tutorials/Resnet_TorchVision_Interpret These would be great starting points for benchmarking. diff --git a/docs/extension/integrated_gradients.md b/docs/extension/integrated_gradients.md index 0a00fb0ad1..ebcca190ec 100644 --- a/docs/extension/integrated_gradients.md +++ b/docs/extension/integrated_gradients.md @@ -42,7 +42,7 @@ class ToyModel(nn.Module): Second, let's apply integrated gradients on the toy model's output layer using sample data. The code snippet below computes the attribution of output with respect to the inputs. -`attribute` method of `IntegratedGradients` class returns input attributions which +The `attribute` method of `IntegratedGradients` class returns input attributions which have the same size and dimensionality as the inputs and an approximation error which is computed based on the completeness property of the integrated gradients. Completeness property is one of the axioms that integrated gradients satisfies. @@ -114,7 +114,7 @@ class ToySoftmaxModel(nn.Module): Now, let's apply integrated gradients on the toy classification model defined above using inputs that contain a range of numbers. We also choose an arbitrary target class (target_class_index: 5) which we use to attribute our predictions to. -Similar to previous example the output of attribution is a tensor with the same +Similar to the previous example, the output of attribution is a tensor with the same dimensionality as the inputs and an approximation error computed based on the completeness property of integrated gradients. @@ -157,9 +157,9 @@ Now, let's look at a model that besides input tensors takes input arguments of other types. In practice this can be used to pass the sequence length or the word/token indices in a sequence of a text, for instance. The example below demonstrates how to use `additional_forward_args`. In this particular example -`additional_forward_args` represents single integer value. -Those arguments are passed as `additional_forward_args` to `attribute` method and -they will be passed to model's forward function followed by inputs in the oder +`additional_forward_args` represents a single integer value. +Those arguments are passed as `additional_forward_args` to the `attribute` method and +they will be passed to the model's forward function followed by inputs in the order provided in `additional_forward_args`. In the example below, we also demonstrate how to apply integrated gradients to a batch of samples. The first dimension of the input corresponds to the batch size. diff --git a/docs/faq.md b/docs/faq.md index de4e22ea4c..16bf59b54a 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -9,7 +9,7 @@ title: FAQ * [Are SmoothGrad or VarGrad supported in Captum?](#are-smoothgrad-or-vargrad-supported-in-captum) * [How do I use Captum with BERT models?](#how-do-i-use-captum-with-bert-models) * [My model inputs or outputs token indices, and when using Captum I see errors relating to gradients, how do I resolve this?](#my-model-inputs-or-outputs-token-indices-and-when-using-captum-i-see-errors-relating-to-gradients-how-do-i-resolve-this) -* [Can my model using functional non-linearities (E.g. nn.functional.ReLU) or reused modules be used with Captum?](#can-my-model-using-functional-non-linearities-eg-nnfunctionalrelu-or-reused-modules-be-used-with-captum) +* [Can my model use functional non-linearities (E.g. nn.functional.ReLU) or can reused modules be used with Captum?](#can-my-model-use-functional-non-linearities-eg-nnfunctionalrelu-or-can-reused-modules-be-used-with-captum) * [Do JIT models, DataParallel models, or DistributedDataParallel models work with Captum?](#do-jit-models-dataparallel-models-or-distributeddataparallel-models-work-with-captum) * [I am working on a new interpretability or attribution method and would like to add it to Captum. How do I proceed?](#i-am-working-on-a-new-interpretability-or-attribution-method-and-would-like-to-add-it-to-captum-how-do-i-proceed) * [I am using a gradient-based attribution algorithm such as integrated gradients for a RNN or LSTM network and I see 'cudnn RNN backward can only be called in training mode'. How can I resolve this issue ?](#how-can-I-resolve-cudnn-RNN-backward-error-for-RNN-or-LSTM-network) @@ -53,7 +53,7 @@ For NLP models that take token indices as inputs, we cannot take gradients with If the output of the model is a token index, such as an image captioning cases, it is necessary to attribute with respect to the token score or probability rather than the index. Make sure that the model returns this and use target to choose the appropriate scalar score to attribute with respect to. -### **Can my model using functional non-linearities (E.g. nn.functional.ReLU) or reused modules be used with Captum?** +### **Can my model use functional non-linearities (E.g. nn.functional.ReLU) or can reused modules be used with Captum?** Most methods will work fine with functional non-linearities and arbitrary operations. Some methods, which require placing hooks during back-propagation, including DeepLift, DeepLiftShap, Guided Backpropagation, and Deconvolution will not work appropriately with functional non-linearities and must use the corresponding module activation (e.g. torch.nn.ReLU) which should be initialized in the module constructor. For DeepLift, it is important to also not reuse modules in the forward function, since this can cause issues in the propagation of multipliers. Computing layer or neuron attribution with layer modules that are used multiple times generally computes attributions for the last execution of the module. For more information regarding these restrictions, refer to the API documentation for the specific method, including DeepLift, DeepLiftShap, Guided Backpropagation, and Deconvolution. diff --git a/scripts/install_via_pip.sh b/scripts/install_via_pip.sh index 7a13dedb9e..de643e0687 100755 --- a/scripts/install_via_pip.sh +++ b/scripts/install_via_pip.sh @@ -37,7 +37,7 @@ export TERM=xterm # NOTE: All of the below installs use sudo, b/c otherwise pip will get # permission errors installing in the docker container. An alternative would be # to use a virtualenv, but that would lead to bifurcation of the CircleCI config -# since we'd need to source the environemnt in each step. +# since we'd need to source the environment in each step. # upgrade pip sudo pip install --upgrade pip @@ -55,7 +55,7 @@ fi if [[ $PYTORCH_NIGHTLY == true ]]; then sudo pip install --upgrade --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html else - # If no version specified, upgrade to latest release. + # If no version is specified, upgrade to the latest release. if [[ $CHOSEN_TORCH_VERSION == -1 ]]; then sudo pip install --upgrade torch else diff --git a/sphinx/source/approximation_methods.rst b/sphinx/source/approximation_methods.rst index b6b197d92e..4deec709bf 100644 --- a/sphinx/source/approximation_methods.rst +++ b/sphinx/source/approximation_methods.rst @@ -1,4 +1,4 @@ -Captum Approximation +Approximation ==================== .. automodule:: captum.attr._utils.approximation_methods diff --git a/sphinx/source/base_classes.rst b/sphinx/source/base_classes.rst index c337d666fc..a1f3d8117b 100644 --- a/sphinx/source/base_classes.rst +++ b/sphinx/source/base_classes.rst @@ -1,32 +1,32 @@ Base Classes -========== +======================== Attribution -^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.Attribution :members: Layer Attribution -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerAttribution :members: Neuron Attribution -^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronAttribution :members: Gradient Attribution -^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.GradientAttribution :members: Perturbation Attribution -^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.PerturbationAttribution :members: diff --git a/sphinx/source/common.rst b/sphinx/source/common.rst deleted file mode 100644 index 711a7e6fe5..0000000000 --- a/sphinx/source/common.rst +++ /dev/null @@ -1,12 +0,0 @@ -Captum.Utils -============ - -.. automodule:: captum.attr._utils.common - -.. autofunction:: validate_input -.. autofunction:: validate_noise_tunnel_type -.. autofunction:: format_input -.. autofunction:: _format_attributions -.. autofunction:: zeros -.. autofunction:: _reshape_and_sum -.. autofunction:: _run_forward diff --git a/sphinx/source/concept.rst b/sphinx/source/concept.rst index 7aa60aabb9..19157398b7 100644 --- a/sphinx/source/concept.rst +++ b/sphinx/source/concept.rst @@ -1,29 +1,29 @@ Concept-based Interpretability -====== +============================== TCAV -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.concept.TCAV :members: ConceptInterpreter -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.concept.ConceptInterpreter :members: Concept -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.concept.Concept :members: Classifier -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.concept.Classifier :members: diff --git a/sphinx/source/conf.py b/sphinx/source/conf.py index 27bdc763fd..b01d1c8b81 100644 --- a/sphinx/source/conf.py +++ b/sphinx/source/conf.py @@ -10,7 +10,9 @@ # -- Path setup -------------------------------------------------------------- import os +import re import sys +from typing import List base_path = os.path.abspath(os.path.join(__file__, "..", "..", "..")) # read module from src instead of installation @@ -75,6 +77,11 @@ # Inlcude init docstrings into body of autoclass directives autoclass_content = "both" +# Preserve signature defaults +# Prevents entire tensors from being printed, & gives callable functions +# proper names +autodoc_preserve_defaults = True + # Configuration for intersphinx: refer to the Python standard library and PyTorch intersphinx_mapping = { "python": ("https://docs.python.org/3", None), @@ -201,3 +208,46 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True + + +# -- Docstring Improvements -------------------------------------------------- + + +# Regex code for typing replacements. +# The "(? None: + """ + Modify docstrings before creating html files. + Sphinx converts the 'Args:' and 'Returns:' sections of docstrings into + reStructuredText (rST) syntax, which can then be found via ':type' & ':rtype'. + + See here for more information: + https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html + """ + for i in range(len(lines)): + # Skip unless line is an parameter doc or a return doc + if not lines[i].startswith(":type"): + continue + if ":py:data:" in lines[i]: + continue + + # Ensure Any, Callable, & Iterator types are hyperlinked with intersphinx. + # The tilde '~' character hides the 'typing.' portion of the string. + lines[i] = re.sub(_rt[0] + r"Any" + _rt[1], "~typing.Any", lines[i]) + lines[i] = re.sub(_rt[0] + r"Callable" + _rt[1], "~typing.Callable", lines[i]) + lines[i] = re.sub(_rt[0] + r"Iterator" + _rt[1], "~typing.Iterator", lines[i]) + lines[i] = re.sub(_rt[0] + r"Iterable" + _rt[1], "~typing.Iterable", lines[i]) + + # Ensure Tensor type is hyperlinked by interpshinx + lines[i] = re.sub(_rt[0] + r"Tensor" + _rt[1], "~torch.Tensor", lines[i]) + + +def setup(app) -> None: + app.connect("autodoc-process-docstring", autodoc_process_docstring) diff --git a/sphinx/source/deconvolution.rst b/sphinx/source/deconvolution.rst index 61e092e768..d5813d3842 100644 --- a/sphinx/source/deconvolution.rst +++ b/sphinx/source/deconvolution.rst @@ -1,5 +1,5 @@ Deconvolution -========= +============= .. autoclass:: captum.attr.Deconvolution :members: diff --git a/sphinx/source/feature_ablation.rst b/sphinx/source/feature_ablation.rst index 35484a0fe6..e337aecf73 100644 --- a/sphinx/source/feature_ablation.rst +++ b/sphinx/source/feature_ablation.rst @@ -1,5 +1,6 @@ Feature Ablation -========= +================ .. autoclass:: captum.attr.FeatureAblation :members: + :exclude-members: compute_convergence_delta diff --git a/sphinx/source/feature_permutation.rst b/sphinx/source/feature_permutation.rst index d58f625aee..609ff1ff39 100644 --- a/sphinx/source/feature_permutation.rst +++ b/sphinx/source/feature_permutation.rst @@ -1,5 +1,6 @@ Feature Permutation -========= +=================== .. autoclass:: captum.attr.FeaturePermutation :members: + :exclude-members: compute_convergence_delta diff --git a/sphinx/source/gradient_shap.rst b/sphinx/source/gradient_shap.rst index 2a676dcb06..8d94c31463 100644 --- a/sphinx/source/gradient_shap.rst +++ b/sphinx/source/gradient_shap.rst @@ -3,6 +3,3 @@ GradientShap .. autoclass:: captum.attr.GradientShap :members: - -.. autoclass:: captum.attr.InputBaselineXGradient - :members: diff --git a/sphinx/source/guided_backprop.rst b/sphinx/source/guided_backprop.rst index 6ef3a947ae..4c0685e8c5 100644 --- a/sphinx/source/guided_backprop.rst +++ b/sphinx/source/guided_backprop.rst @@ -1,5 +1,5 @@ Guided Backprop -========= +=============== .. autoclass:: captum.attr.GuidedBackprop :members: diff --git a/sphinx/source/guided_grad_cam.rst b/sphinx/source/guided_grad_cam.rst index 99f18d2af1..207d8e55fa 100644 --- a/sphinx/source/guided_grad_cam.rst +++ b/sphinx/source/guided_grad_cam.rst @@ -1,5 +1,5 @@ Guided GradCAM -========= +============== .. autoclass:: captum.attr.GuidedGradCam :members: diff --git a/sphinx/source/influence.rst b/sphinx/source/influence.rst index 6366924a70..6b906d8c47 100644 --- a/sphinx/source/influence.rst +++ b/sphinx/source/influence.rst @@ -1,41 +1,41 @@ Influential Examples -====== +==================== DataInfluence -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.influence.DataInfluence :members: SimilarityInfluence -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.influence.SimilarityInfluence :members: TracInCPBase -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.influence.TracInCPBase :members: TracInCP -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.influence.TracInCP :members: TracInCPFast -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.influence.TracInCPFast :members: TracInCPFastRandProj -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.influence.TracInCPFastRandProj :members: diff --git a/sphinx/source/input_x_gradient.rst b/sphinx/source/input_x_gradient.rst index cd5f222e27..5213eab69b 100644 --- a/sphinx/source/input_x_gradient.rst +++ b/sphinx/source/input_x_gradient.rst @@ -1,5 +1,5 @@ Input X Gradient -=============== +================ .. autoclass:: captum.attr.InputXGradient :members: diff --git a/sphinx/source/insights.rst b/sphinx/source/insights.rst index ece9180971..1e0963d483 100644 --- a/sphinx/source/insights.rst +++ b/sphinx/source/insights.rst @@ -4,12 +4,12 @@ Insights Batch ^^^^^ -.. autoclass:: captum.insights.api.Batch +.. autoclass:: captum.insights.Batch :members: AttributionVisualizer ^^^^^^^^^^^^^^^^^^^^^ -.. autoclass:: captum.insights.api.AttributionVisualizer +.. autoclass:: captum.insights.AttributionVisualizer :members: diff --git a/sphinx/source/kernel_shap.rst b/sphinx/source/kernel_shap.rst index 48cfde3535..421ed0ea62 100644 --- a/sphinx/source/kernel_shap.rst +++ b/sphinx/source/kernel_shap.rst @@ -3,3 +3,4 @@ KernelShap .. autoclass:: captum.attr.KernelShap :members: + :exclude-members: compute_convergence_delta diff --git a/sphinx/source/layer.rst b/sphinx/source/layer.rst index 7fbbd5bd85..466fbd97d2 100644 --- a/sphinx/source/layer.rst +++ b/sphinx/source/layer.rst @@ -1,70 +1,70 @@ Layer Attribution -====== +=========================== Layer Conductance -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerConductance :members: Layer Activation -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerActivation :members: Internal Influence -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.InternalInfluence :members: Layer Gradient X Activation -^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerGradientXActivation :members: GradCAM -^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerGradCam :members: Layer DeepLift -^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerDeepLift :members: Layer DeepLiftShap -^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerDeepLiftShap :members: Layer GradientShap -^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerGradientShap :members: Layer Integrated Gradients -^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerIntegratedGradients :members: Layer Feature Ablation -^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerFeatureAblation :members: Layer LRP -^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.LayerLRP :members: diff --git a/sphinx/source/lime.rst b/sphinx/source/lime.rst index 4c722304f1..483458572c 100644 --- a/sphinx/source/lime.rst +++ b/sphinx/source/lime.rst @@ -3,6 +3,7 @@ Lime .. autoclass:: captum.attr.LimeBase :members: + :exclude-members: compute_convergence_delta .. autoclass:: captum.attr.Lime :members: diff --git a/sphinx/source/metrics.rst b/sphinx/source/metrics.rst index 47c11e4856..8e71a40b02 100644 --- a/sphinx/source/metrics.rst +++ b/sphinx/source/metrics.rst @@ -1,15 +1,15 @@ Metrics -====== +=========== Infidelity -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^ .. autoclass:: captum.metrics.infidelity :members: Sensitivity -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^ .. autoclass:: captum.metrics.sensitivity_max :members: diff --git a/sphinx/source/neuron.rst b/sphinx/source/neuron.rst index 8ad1514378..897f237baf 100644 --- a/sphinx/source/neuron.rst +++ b/sphinx/source/neuron.rst @@ -1,56 +1,57 @@ Neuron Attribution -======= +=========================== Neuron Gradient -^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronGradient :members: Neuron Integrated Gradients -^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronIntegratedGradients :members: Neuron Conductance -^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronConductance :members: Neuron DeepLift -^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronDeepLift :members: Neuron DeepLiftShap -^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronDeepLiftShap :members: Neuron GradientShap -^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronGradientShap :members: Neuron Guided Backprop -^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronGuidedBackprop :members: Neuron Deconvolution -^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronDeconvolution :members: Neuron Feature Ablation -^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.attr.NeuronFeatureAblation :members: + :exclude-members: compute_convergence_delta diff --git a/sphinx/source/noise_tunnel.rst b/sphinx/source/noise_tunnel.rst index e1aff40b18..15b6ec7dbf 100644 --- a/sphinx/source/noise_tunnel.rst +++ b/sphinx/source/noise_tunnel.rst @@ -3,3 +3,4 @@ NoiseTunnel .. autoclass:: captum.attr.NoiseTunnel :members: + :exclude-members: compute_convergence_delta diff --git a/sphinx/source/occlusion.rst b/sphinx/source/occlusion.rst index a05b236e24..5867d739b9 100644 --- a/sphinx/source/occlusion.rst +++ b/sphinx/source/occlusion.rst @@ -3,3 +3,4 @@ Occlusion .. autoclass:: captum.attr.Occlusion :members: + :exclude-members: compute_convergence_delta diff --git a/sphinx/source/pytext.rst b/sphinx/source/pytext.rst index 66c847dcd9..f11a6a2099 100644 --- a/sphinx/source/pytext.rst +++ b/sphinx/source/pytext.rst @@ -1,11 +1,8 @@ Captum.Models ========================== -.. automodule:: captum.attr._models.pytext - -.. autoclass:: PyTextInterpretableEmbedding +.. autoclass:: captum.attr._models.pytext.PyTextInterpretableEmbedding :members: - -.. autoclass:: BaselineGenerator +.. autoclass:: captum.attr._models.pytext.BaselineGenerator :members: diff --git a/sphinx/source/robust.rst b/sphinx/source/robust.rst index 3b90a32ae5..48b360ad80 100644 --- a/sphinx/source/robust.rst +++ b/sphinx/source/robust.rst @@ -1,29 +1,29 @@ Robustness -====== +====================== FGSM -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.robust.FGSM :members: PGD -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.robust.PGD :members: Attack Comparator -^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.robust.AttackComparator :members: Min Param Perturbation -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^ .. autoclass:: captum.robust.MinParamPerturbation :members: diff --git a/sphinx/source/shapley_value_sampling.rst b/sphinx/source/shapley_value_sampling.rst index c998125af9..4d40338540 100644 --- a/sphinx/source/shapley_value_sampling.rst +++ b/sphinx/source/shapley_value_sampling.rst @@ -1,7 +1,9 @@ Shapley Value Sampling -========= +====================== .. autoclass:: captum.attr.ShapleyValueSampling :members: + :exclude-members: compute_convergence_delta .. autoclass:: captum.attr.ShapleyValues :members: + :exclude-members: compute_convergence_delta diff --git a/sphinx/source/utilities.rst b/sphinx/source/utilities.rst index f4e3d7ace6..a19e75df9e 100644 --- a/sphinx/source/utilities.rst +++ b/sphinx/source/utilities.rst @@ -8,6 +8,8 @@ Visualization .. autofunction:: captum.attr.visualization.visualize_image_attr_multiple +.. autofunction:: captum.attr.visualization.visualize_timeseries_attr + Interpretable Embeddings ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -16,6 +18,7 @@ Interpretable Embeddings :members: .. autofunction:: captum.attr.configure_interpretable_embedding_layer + .. autofunction:: captum.attr.remove_interpretable_embedding_layer diff --git a/tests/influence/_core/test_tracin_intermediate_quantities.py b/tests/influence/_core/test_tracin_intermediate_quantities.py index 7f3e806c28..9f0daebad3 100644 --- a/tests/influence/_core/test_tracin_intermediate_quantities.py +++ b/tests/influence/_core/test_tracin_intermediate_quantities.py @@ -179,7 +179,7 @@ def test_tracin_intermediate_quantities_consistent( else: # `test_features` is a tuple, so we unpack it to place in tuple, # along with `test_labels` - test_batch = (*test_features, test_labels) + test_batch = (*test_features, test_labels) # type: ignore[assignment] # the influence score is the dot product of intermediate quantities intermediate_quantities_scores = torch.matmul( diff --git a/website/sidebars.json b/website/sidebars.json index 0337e1bbe9..9efb1fddb2 100644 --- a/website/sidebars.json +++ b/website/sidebars.json @@ -1,7 +1,7 @@ { "docs": { "About": ["introduction"], - "General": ["getting_started", "captum_insights", "algorithms", "algorithms_comparison_matrix", "faq", "contribution_guidelines"], + "General": ["getting_started", "captum_insights", "attribution_algorithms", "algorithms_comparison_matrix", "faq", "contribution_guidelines"], "Usage": ["extension/integrated_gradients"] } } From 30a88745ea5055be63b8a5e4d63d1c0ff787773e Mon Sep 17 00:00:00 2001 From: Facebook Community Bot Date: Tue, 20 Sep 2022 17:53:17 -0700 Subject: [PATCH 126/174] Re-sync with internal repository (#1028) Co-authored-by: Facebook Community Bot <6422482+facebook-github-bot@users.noreply.github.com> --- website/pages/en/index.js | 39 ----------------------------------- website/static/css/custom.css | 22 -------------------- 2 files changed, 61 deletions(-) diff --git a/website/pages/en/index.js b/website/pages/en/index.js index d04e321ab7..9dcd0eb1b7 100755 --- a/website/pages/en/index.js +++ b/website/pages/en/index.js @@ -265,10 +265,8 @@ Convergence Delta: tensor([2.3842e-07, -4.7684e-07]) return (
-
-
@@ -277,41 +275,4 @@ Convergence Delta: tensor([2.3842e-07, -4.7684e-07]) } } -function SocialBanner() { - return ( -
-
- Support Ukraine 🇺🇦{' '} - - Help Provide Humanitarian Aid to Ukraine - - . -
-
- ); -} - -function VideoContainer() { - return ( -
-
-
-

Check it out in the intro video

-
-