Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable analyzing nested input- and output-dicts #212

Merged
merged 16 commits into from
Feb 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ jobs:
python -m pip install --upgrade pip
python -m pip install mypy pytest pytest-cov
pip install torch==${{ matrix.pytorch-version }} torchvision
pip install transformers
pip install compressai
- name: mypy
if: ${{ matrix.pytorch-version == '1.13' }}
run: |
Expand Down
2 changes: 2 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ pylint
pytest
pytest-cov
pre-commit
transformers
compressai
60 changes: 59 additions & 1 deletion tests/fixtures/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import math
from collections import namedtuple
from typing import Any, cast
from typing import Any, Sequence, cast

import torch
from torch import nn
Expand Down Expand Up @@ -323,6 +323,64 @@ def forward(
return x


class ObjectWithTensors:
"""A class with a 'tensors'-attribute."""

def __init__(self, tensors: torch.Tensor | Sequence[Any]) -> None:
self.tensors = tensors


class HighlyNestedDictModel(nn.Module):
"""Model that returns a highly nested dict."""

def __init__(self) -> None:
super().__init__()
self.lin1 = nn.Linear(10, 10)
self.lin2 = nn.Linear(10, 10)

def forward(
self, x: torch.Tensor
) -> dict[str, tuple[dict[str, list[ObjectWithTensors]]]]:
x = self.lin1(x)
x = self.lin2(x)
x = F.softmax(x, dim=0)
return {"foo": ({"bar": [ObjectWithTensors(x)]},)}


class IntWithGetitem(int):
"""An int with a __getitem__ method."""

def __init__(self, tensor: torch.Tensor) -> None:
super().__init__()
self.tensor = tensor

def __int__(self) -> IntWithGetitem:
return self

def __getitem__(self, val: int) -> torch.Tensor:
return self.tensor * val


class EdgecaseInputOutputModel(nn.Module):
"""
For testing LayerInfo.calculate_size.extract_tensor:

case hasattr(inputs, "__getitem__") but not
isinstance(inputs, (list, tuple, dict)).

case not inputs.
"""

def __init__(self) -> None:
super().__init__()
self.linear = nn.Linear(3, 1)

def forward(self, input_list: dict[str, torch.Tensor]) -> dict[str, IntWithGetitem]:
x = input_list["foo"] if input_list else torch.ones(3)
x = self.linear(x)
return {"foo": IntWithGetitem(x)}


class NamedTuple(nn.Module):
"""Model that takes in a NamedTuple as input."""

Expand Down
38 changes: 38 additions & 0 deletions tests/test_output/bert.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
====================================================================================================
Layer (type:depth-idx) Output Shape Param #
====================================================================================================
BertModel [2, 768] --
├─BertEmbeddings: 1-1 [2, 512, 768] --
│ └─Embedding: 2-1 [2, 512, 768] 23,440,896
│ └─Embedding: 2-2 [2, 512, 768] 1,536
│ └─Embedding: 2-3 [1, 512, 768] 393,216
│ └─LayerNorm: 2-4 [2, 512, 768] 1,536
│ └─Dropout: 2-5 [2, 512, 768] --
├─BertEncoder: 1-2 [2, 512, 768] --
│ └─ModuleList: 2-6 -- --
│ │ └─BertLayer: 3-1 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-2 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-3 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-4 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-5 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-6 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-7 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-8 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-9 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-10 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-11 [2, 512, 768] 7,087,872
│ │ └─BertLayer: 3-12 [2, 512, 768] 7,087,872
├─BertPooler: 1-3 [2, 768] --
│ └─Linear: 2-7 [2, 768] 590,592
│ └─Tanh: 2-8 [2, 768] --
====================================================================================================
Total params: 109,482,240
Trainable params: 109,482,240
Non-trainable params: 0
Total mult-adds (M): 218.57
====================================================================================================
Input size (MB): 0.01
Forward/backward pass size (MB): 852.50
Params size (MB): 437.93
Estimated Total Size (MB): 1290.45
====================================================================================================
45 changes: 45 additions & 0 deletions tests/test_output/compressai.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
===============================================================================================
Layer (type:depth-idx) Output Shape Param #
===============================================================================================
FactorizedPrior [1, 192, 16, 16] --
├─Sequential: 1-1 [1, 192, 16, 16] --
│ └─Conv2d: 2-1 [1, 128, 128, 128] 9,728
│ └─GDN: 2-2 [1, 128, 128, 128] 16,512
│ │ └─NonNegativeParametrizer: 3-1 [128] --
│ │ └─NonNegativeParametrizer: 3-2 [128, 128] --
│ └─Conv2d: 2-3 [1, 128, 64, 64] 409,728
│ └─GDN: 2-4 [1, 128, 64, 64] 16,512
│ │ └─NonNegativeParametrizer: 3-3 [128] --
│ │ └─NonNegativeParametrizer: 3-4 [128, 128] --
│ └─Conv2d: 2-5 [1, 128, 32, 32] 409,728
│ └─GDN: 2-6 [1, 128, 32, 32] 16,512
│ │ └─NonNegativeParametrizer: 3-5 [128] --
│ │ └─NonNegativeParametrizer: 3-6 [128, 128] --
│ └─Conv2d: 2-7 [1, 192, 16, 16] 614,592
├─EntropyBottleneck: 1-2 [1, 192, 16, 16] 11,712
│ └─LowerBound: 2-8 [192, 1, 256] --
├─Sequential: 1-3 [1, 3, 256, 256] --
│ └─ConvTranspose2d: 2-9 [1, 128, 32, 32] 614,528
│ └─GDN: 2-10 [1, 128, 32, 32] 16,512
│ │ └─NonNegativeParametrizer: 3-7 [128] --
│ │ └─NonNegativeParametrizer: 3-8 [128, 128] --
│ └─ConvTranspose2d: 2-11 [1, 128, 64, 64] 409,728
│ └─GDN: 2-12 [1, 128, 64, 64] 16,512
│ │ └─NonNegativeParametrizer: 3-9 [128] --
│ │ └─NonNegativeParametrizer: 3-10 [128, 128] --
│ └─ConvTranspose2d: 2-13 [1, 128, 128, 128] 409,728
│ └─GDN: 2-14 [1, 128, 128, 128] 16,512
│ │ └─NonNegativeParametrizer: 3-11 [128] --
│ │ └─NonNegativeParametrizer: 3-12 [128, 128] --
│ └─ConvTranspose2d: 2-15 [1, 3, 256, 256] 9,603
===============================================================================================
Total params: 2,998,147
Trainable params: 2,998,147
Non-trainable params: 0
Total mult-adds (G): 12.06
===============================================================================================
Input size (MB): 0.79
Forward/backward pass size (MB): 46.01
Params size (MB): 11.55
Estimated Total Size (MB): 58.34
===============================================================================================
16 changes: 16 additions & 0 deletions tests/test_output/edgecase_input_output_model.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
EdgecaseInputOutputModel -- --
├─Linear: 1-1 [1] 4
==========================================================================================
Total params: 4
Trainable params: 4
Non-trainable params: 0
Total mult-adds (M): 0.00
==========================================================================================
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
==========================================================================================
46 changes: 46 additions & 0 deletions tests/test_output/flan_t5_small.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
==============================================================================================================
Layer (type:depth-idx) Output Shape Param #
==============================================================================================================
T5ForConditionalGeneration [2, 100, 512] --
├─T5Stack: 1-1 [2, 100, 512] 35,332,800
├─T5Stack: 1-2 -- (recursive)
│ └─Embedding: 2-1 [2, 100, 512] 16,449,536
├─T5Stack: 1-3 -- (recursive)
│ └─Dropout: 2-2 [2, 100, 512] --
│ └─ModuleList: 2-3 -- --
│ │ └─T5Block: 3-1 [2, 100, 512] 2,360,512
│ │ └─T5Block: 3-2 [2, 100, 512] 2,360,320
│ │ └─T5Block: 3-3 [2, 100, 512] 2,360,320
│ │ └─T5Block: 3-4 [2, 100, 512] 2,360,320
│ │ └─T5Block: 3-5 [2, 100, 512] 2,360,320
│ │ └─T5Block: 3-6 [2, 100, 512] 2,360,320
│ │ └─T5Block: 3-7 [2, 100, 512] 2,360,320
│ │ └─T5Block: 3-8 [2, 100, 512] 2,360,320
│ └─T5LayerNorm: 2-4 [2, 100, 512] 512
│ └─Dropout: 2-5 [2, 100, 512] --
├─T5Stack: 1-4 [2, 6, 100, 64] 16,449,536
│ └─Embedding: 2-6 [2, 100, 512] (recursive)
│ └─Dropout: 2-7 [2, 100, 512] --
│ └─ModuleList: 2-8 -- --
│ │ └─T5Block: 3-9 [2, 100, 512] 3,147,456
│ │ └─T5Block: 3-10 [2, 100, 512] 3,147,264
│ │ └─T5Block: 3-11 [2, 100, 512] 3,147,264
│ │ └─T5Block: 3-12 [2, 100, 512] 3,147,264
│ │ └─T5Block: 3-13 [2, 100, 512] 3,147,264
│ │ └─T5Block: 3-14 [2, 100, 512] 3,147,264
│ │ └─T5Block: 3-15 [2, 100, 512] 3,147,264
│ │ └─T5Block: 3-16 [2, 100, 512] 3,147,264
│ └─T5LayerNorm: 2-9 [2, 100, 512] 512
│ └─Dropout: 2-10 [2, 100, 512] --
├─Linear: 1-5 [2, 100, 32128] 16,449,536
==============================================================================================================
Total params: 128,743,488
Trainable params: 128,743,488
Non-trainable params: 0
Total mult-adds (M): 186.86
==============================================================================================================
Input size (MB): 0.00
Forward/backward pass size (MB): 217.84
Params size (MB): 307.84
Estimated Total Size (MB): 525.69
==============================================================================================================
17 changes: 17 additions & 0 deletions tests/test_output/highly_nested_dict_model.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
HighlyNestedDictModel [10] --
├─Linear: 1-1 [10] 110
├─Linear: 1-2 [10] 110
==========================================================================================
Total params: 220
Trainable params: 220
Non-trainable params: 0
Total mult-adds (M): 0.00
==========================================================================================
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
==========================================================================================
22 changes: 22 additions & 0 deletions tests/torchinfo_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
ConvLayerB,
CustomParameter,
DictParameter,
EdgecaseInputOutputModel,
EmptyModule,
FakePrunedLayerModel,
HighlyNestedDictModel,
InsideModel,
LinearModel,
LSTMNet,
Expand Down Expand Up @@ -344,6 +346,26 @@ def test_module_dict() -> None:
)


def test_highly_nested_dict_model() -> None:
"""
Test the following three if-clauses
from LayerInfo.calculate_size.extract_tensor: 1, 2, 4, 5
(starts counting from 1)
"""
model = HighlyNestedDictModel()
summary(model, input_data=torch.ones(10))


def test_edgecase_input_output_model() -> None:
"""
Test the following two if-clauses
from LayerInfo.calculate_size.extract_tensor: 3
(starts counting from 1) as well as the final return.
"""
model = EdgecaseInputOutputModel()
summary(model, input_data=[{}])


def test_model_with_args() -> None:
summary(RecursiveNet(), input_size=(1, 64, 28, 28), args1="args1", args2="args2")

Expand Down
44 changes: 44 additions & 0 deletions tests/torchinfo_xl_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import pytest
import torch
import torchvision # type: ignore[import]
from compressai.zoo import image_models # type: ignore[import]
from packaging import version
from transformers import ( # type: ignore[import]
AutoModelForSeq2SeqLM,
BertConfig,
BertModel,
)

from tests.fixtures.genotype import GenotypeNetwork # type: ignore[attr-defined]
from tests.fixtures.tmva_net import TMVANet # type: ignore[attr-defined]
Expand Down Expand Up @@ -143,3 +150,40 @@ def test_google() -> None:
# Check googlenet in training mode since InceptionAux layers are used in
# forward-prop in train mode but not in eval mode.
summary(google_net, (1, 3, 112, 112), depth=7, mode="train")


@pytest.mark.skipif(
version.parse(torch.__version__) < version.parse("1.8"),
reason="FlanT5Small only works for PyTorch v1.8 and above",
)
def test_flan_t5_small() -> None:
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
inputs = {
"input_ids": torch.zeros(2, 100).long(),
"attention_mask": torch.zeros(2, 100).long(),
"labels": torch.zeros(2, 100).long(),
}
summary(model, input_data=inputs)


@pytest.mark.skipif(
version.parse(torch.__version__) < version.parse("1.8"),
reason="BertModel only works for PyTorch v1.8 and above",
)
def test_bert() -> None:
model = BertModel(BertConfig())
summary(
model,
input_size=[(2, 512), (2, 512), (2, 512)],
dtypes=[torch.int, torch.int, torch.int],
device="cpu",
)


@pytest.mark.skipif(
version.parse(torch.__version__) < version.parse("1.8"),
reason="compressai only works for PyTorch v1.8 and above",
)
def test_compressai() -> None:
model = image_models["bmshj2018-factorized"](quality=4, pretrained=True)
summary(model, (1, 3, 256, 256))
Loading