Merge branch 'main' into dataset_docs

pytorch · Feb 3, 2022 · fed9ac0 · fed9ac0
2 parents 89d3adb + 3f33eeb
commit fed9ac0
Show file tree

Hide file tree

Showing 43 changed files with 806 additions and 439 deletions.
diff --git a/.circleci/unittest/linux/scripts/run_test.sh b/.circleci/unittest/linux/scripts/run_test.sh
@@ -7,4 +7,4 @@ conda activate ./env
 
 export PYTORCH_TEST_WITH_SLOW='1'
 python -m torch.utils.collect_env
-pytest --cov=torchvision --junitxml=test-results/junit.xml -v --durations 20
+pytest --junitxml=test-results/junit.xml -v --durations 20
diff --git a/.circleci/unittest/windows/scripts/run_test.sh b/.circleci/unittest/windows/scripts/run_test.sh
@@ -10,4 +10,4 @@ source "$this_dir/set_cuda_envs.sh"
 
 export PYTORCH_TEST_WITH_SLOW='1'
 python -m torch.utils.collect_env
-pytest --cov=torchvision --junitxml=test-results/junit.xml -v --durations 20
+pytest --junitxml=test-results/junit.xml -v --durations 20
diff --git a/.coveragerc b/.coveragerc
diff --git a/docs/source/models.rst b/docs/source/models.rst
@@ -89,6 +89,10 @@ You can construct a model with random weights by calling its constructor:
  vit_b_32 = models.vit_b_32()
  vit_l_16 = models.vit_l_16()
  vit_l_32 = models.vit_l_32()
+ convnext_tiny = models.convnext_tiny()
+ convnext_small = models.convnext_small()
+ convnext_base = models.convnext_base()
+ convnext_large = models.convnext_large()
 
 We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`.
 These can be constructed by passing ``pretrained=True``:
@@ -136,6 +140,10 @@ These can be constructed by passing ``pretrained=True``:
  vit_b_32 = models.vit_b_32(pretrained=True)
  vit_l_16 = models.vit_l_16(pretrained=True)
  vit_l_32 = models.vit_l_32(pretrained=True)
+ convnext_tiny = models.convnext_tiny(pretrained=True)
+ convnext_small = models.convnext_small(pretrained=True)
+ convnext_base = models.convnext_base(pretrained=True)
+ convnext_large = models.convnext_large(pretrained=True)
 
 Instancing a pre-trained model will download its weights to a cache directory.
 This directory can be set using the `TORCH_HOME` environment variable. See
@@ -248,7 +256,10 @@ vit_b_16 81.072 95.318
 vit_b_32 75.912 92.466
 vit_l_16 79.662 94.638
 vit_l_32 76.972 93.070
-convnext_tiny (prototype) 82.520 96.146
+convnext_tiny 82.520 96.146
+convnext_small 83.616 96.650
+convnext_base 84.062 96.870
+convnext_large 84.414 96.976
 ================================ ============= =============
 
 
@@ -464,6 +475,18 @@ VisionTransformer
  vit_l_16
  vit_l_32
 
+ConvNeXt
+--------
+
+.. autosummary::
+ :toctree: generated/
+ :template: function.rst
+
+ convnext_tiny
+ convnext_small
+ convnext_base
+ convnext_large
+
 Quantized Models
 ----------------
 

diff --git a/docs/source/ops.rst b/docs/source/ops.rst
@@ -21,6 +21,7 @@ Operators
  clip_boxes_to_image
  deform_conv2d
  generalized_box_iou
+ generalized_box_iou_loss
  masks_to_boxes
  nms
  ps_roi_align

diff --git a/hubconf.py b/hubconf.py
@@ -2,6 +2,7 @@
 dependencies = ["torch"]
 
 from torchvision.models.alexnet import alexnet
+from torchvision.models.convnext import convnext_tiny, convnext_small, convnext_base, convnext_large
 from torchvision.models.densenet import densenet121, densenet169, densenet201, densenet161
 from torchvision.models.efficientnet import (
  efficientnet_b0,

diff --git a/references/classification/README.md b/references/classification/README.md
@@ -201,11 +201,12 @@ and `--batch_size 64`.
 ### ConvNeXt
 ```
 torchrun --nproc_per_node=8 train.py\ 
---model convnext_tiny --batch-size 128 --opt adamw --lr 1e-3 --lr-scheduler cosineannealinglr \ 
+--model $MODEL --batch-size 128 --opt adamw --lr 1e-3 --lr-scheduler cosineannealinglr \ 
 --lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \ 
 --label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.05 --norm-weight-decay 0.0 \
---train-crop-size 176 --model-ema --val-resize-size 236 --ra-sampler --ra-reps 4
+--train-crop-size 176 --model-ema --val-resize-size 232 --ra-sampler --ra-reps 4
 ```
+Here `$MODEL` is one of `convnext_tiny`, `convnext_small`, `convnext_base` and `convnext_large`. Note that each variant had its `--val-resize-size` optimized in a post-training step, see their `Weights` entry for their exact value.
 
 Note that the above command corresponds to training on a single node with 8 GPUs.
 For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),

diff --git a/references/classification/train.py b/references/classification/train.py
@@ -178,7 +178,7 @@ def load_data(traindir, valdir, args):
 
  print("Creating data loaders")
  if args.distributed:
- if args.ra_sampler:
+ if hasattr(args, "ra_sampler") and args.ra_sampler:
  train_sampler = RASampler(dataset, shuffle=True, repetitions=args.ra_reps)
  else:
  train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)

diff --git a/references/classification/train_quantization.py b/references/classification/train_quantization.py
@@ -13,14 +13,16 @@
 
 
 try:
- from torchvision.prototype import models as PM
+ from torchvision import prototype
 except ImportError:
- PM = None
+ prototype = None
 
 
 def main(args):
- if args.weights and PM is None:
+ if args.prototype and prototype is None:
  raise ImportError("The prototype module couldn't be found. Please install the latest torchvision nightly.")
+ if not args.prototype and args.weights:
+ raise ValueError("The weights parameter works only in prototype mode. Please pass the --prototype argument.")
  if args.output_dir:
  utils.mkdir(args.output_dir)
 
@@ -54,14 +56,14 @@ def main(args):
 
  print("Creating model", args.model)
  # when training quantized models, we always start from a pre-trained fp32 reference model
- if not args.weights:
+ if not args.prototype:
  model = torchvision.models.quantization.__dict__[args.model](pretrained=True, quantize=args.test_only)
  else:
- model = PM.quantization.__dict__[args.model](weights=args.weights, quantize=args.test_only)
+ model = prototype.models.quantization.__dict__[args.model](weights=args.weights, quantize=args.test_only)
  model.to(device)
 
  if not (args.test_only or args.post_training_quantize):
- model.fuse_model()
+ model.fuse_model(is_qat=True)
  model.qconfig = torch.ao.quantization.get_default_qat_qconfig(args.backend)
  torch.ao.quantization.prepare_qat(model, inplace=True)
 
@@ -95,7 +97,7 @@ def main(args):
  ds, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True
  )
  model.eval()
- model.fuse_model()
+ model.fuse_model(is_qat=False)
  model.qconfig = torch.ao.quantization.get_default_qconfig(args.backend)
  torch.ao.quantization.prepare(model, inplace=True)
  # Calibrate first
@@ -264,6 +266,12 @@ def get_args_parser(add_help=True):
  parser.add_argument("--clip-grad-norm", default=None, type=float, help="the maximum gradient norm (default None)")
 
  # Prototype models only
+ parser.add_argument(
+ "--prototype",
+ dest="prototype",
+ help="Use prototype model builders instead those from main area",
+ action="store_true",
+ )
  parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
 
  return parser

diff --git a/references/classification/utils.py b/references/classification/utils.py
@@ -344,7 +344,7 @@ def store_model_weights(model, checkpoint_path, checkpoint_key="model", strict=T
 
  # Quantized Classification
  model = M.quantization.mobilenet_v3_large(pretrained=False, quantize=False)
- model.fuse_model()
+ model.fuse_model(is_qat=True)
  model.qconfig = torch.ao.quantization.get_default_qat_qconfig('qnnpack')
  _ = torch.ao.quantization.prepare_qat(model, inplace=True)
  print(store_model_weights(model, './qat.pth'))

diff --git a/test/expect/ModelTester.test_convnext_base_expect.pkl b/test/expect/ModelTester.test_convnext_base_expect.pkl
diff --git a/test/expect/ModelTester.test_convnext_large_expect.pkl b/test/expect/ModelTester.test_convnext_large_expect.pkl
diff --git a/test/expect/ModelTester.test_convnext_small_expect.pkl b/test/expect/ModelTester.test_convnext_small_expect.pkl
diff --git a/test/expect/ModelTester.test_vitc_b_16_expect.pkl b/test/expect/ModelTester.test_vitc_b_16_expect.pkl
diff --git a/test/test_models.py b/test/test_models.py
@@ -8,6 +8,7 @@
 import warnings
 from collections import OrderedDict
 from tempfile import TemporaryDirectory
+from typing import Any
 
 import pytest
 import torch
@@ -514,6 +515,35 @@ def test_generalizedrcnn_transform_repr():
  assert t.__repr__() == expected_string
 
 
+test_vit_conv_stem_configs = [
+ models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=64),
+ models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=128),
+ models.vision_transformer.ConvStemConfig(kernel_size=3, stride=1, out_channels=128),
+ models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=256),
+ models.vision_transformer.ConvStemConfig(kernel_size=3, stride=1, out_channels=256),
+ models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=512),
+]
+
+
+def vitc_b_16(**kwargs: Any):
+ return models.VisionTransformer(
+ image_size=224,
+ patch_size=16,
+ num_layers=12,
+ num_heads=12,
+ hidden_dim=768,
+ mlp_dim=3072,
+ conv_stem_configs=test_vit_conv_stem_configs,
+ **kwargs,
+ )
+
+
+@pytest.mark.parametrize("model_fn", [vitc_b_16])
+@pytest.mark.parametrize("dev", cpu_and_gpu())
+def test_vitc_models(model_fn, dev):
+ test_classification_model(model_fn, dev)
+
+
 @pytest.mark.parametrize("model_fn", get_models_from_module(models))
 @pytest.mark.parametrize("dev", cpu_and_gpu())
 def test_classification_model(model_fn, dev):
@@ -803,7 +833,7 @@ def test_quantized_classification_model(model_fn):
  model.train()
  model.qconfig = torch.ao.quantization.default_qat_qconfig
 
- model.fuse_model()
+ model.fuse_model(is_qat=not eval_mode)
  if eval_mode:
  torch.ao.quantization.prepare(model, inplace=True)
  else:

diff --git a/test/test_utils.py b/test/test_utils.py
@@ -124,7 +124,7 @@ def test_draw_boxes_vanilla():
  img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
  img_cp = img.clone()
  boxes_cp = boxes.clone()
- result = utils.draw_bounding_boxes(img, boxes, fill=False, width=7)
+ result = utils.draw_bounding_boxes(img, boxes, fill=False, width=7, colors="white")
 
  path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_boxes_vanilla.png")
  if not os.path.exists(path):
@@ -149,7 +149,11 @@ def test_draw_invalid_boxes():
  img_tp = ((1, 1, 1), (1, 2, 3))
  img_wrong1 = torch.full((3, 5, 5), 255, dtype=torch.float)
  img_wrong2 = torch.full((1, 3, 5, 5), 255, dtype=torch.uint8)
+ img_correct = torch.zeros((3, 10, 10), dtype=torch.uint8)
  boxes = torch.tensor([[0, 0, 20, 20], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
+ labels_wrong = ["one", "two"]
+ colors_wrong = ["pink", "blue"]
+
  with pytest.raises(TypeError, match="Tensor expected"):
  utils.draw_bounding_boxes(img_tp, boxes)
  with pytest.raises(ValueError, match="Tensor uint8 expected"):
@@ -158,6 +162,10 @@ def test_draw_invalid_boxes():
  utils.draw_bounding_boxes(img_wrong2, boxes)
  with pytest.raises(ValueError, match="Only grayscale and RGB images are supported"):
  utils.draw_bounding_boxes(img_wrong2[0][:2], boxes)
+ with pytest.raises(ValueError, match="Number of boxes"):
+ utils.draw_bounding_boxes(img_correct, boxes, labels_wrong)
+ with pytest.raises(ValueError, match="Number of colors"):
+ utils.draw_bounding_boxes(img_correct, boxes, colors=colors_wrong)
 
 
 @pytest.mark.parametrize(
@@ -317,29 +325,42 @@ def test_draw_keypoints_errors():
  utils.draw_keypoints(image=img, keypoints=invalid_keypoints)
 
 
-def test_flow_to_image():
+@pytest.mark.parametrize("batch", (True, False))
+def test_flow_to_image(batch):
  h, w = 100, 100
  flow = torch.meshgrid(torch.arange(h), torch.arange(w), indexing="ij")
  flow = torch.stack(flow[::-1], dim=0).float()
  flow[0] -= h / 2
  flow[1] -= w / 2
+
+ if batch:
+ flow = torch.stack([flow, flow])
+
  img = utils.flow_to_image(flow)
+ assert img.shape == (2, 3, h, w) if batch else (3, h, w)
+
  path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "expected_flow.pt")
  expected_img = torch.load(path, map_location="cpu")
- assert_equal(expected_img, img)
 
+ if batch:
+ expected_img = torch.stack([expected_img, expected_img])
+
+ assert_equal(expected_img, img)
 
-def test_flow_to_image_errors():
- wrong_flow1 = torch.full((3, 10, 10), 0, dtype=torch.float)
- wrong_flow2 = torch.full((2, 10), 0, dtype=torch.float)
- wrong_flow3 = torch.full((2, 10, 30), 0, dtype=torch.int)
 
- with pytest.raises(ValueError, match="Input flow should have shape"):
- utils.flow_to_image(flow=wrong_flow1)
- with pytest.raises(ValueError, match="Input flow should have shape"):
- utils.flow_to_image(flow=wrong_flow2)
- with pytest.raises(ValueError, match="Flow should be of dtype torch.float"):
- utils.flow_to_image(flow=wrong_flow3)
+@pytest.mark.parametrize(
+ "input_flow, match",
+ (
+ (torch.full((3, 10, 10), 0, dtype=torch.float), "Input flow should have shape"),
+ (torch.full((5, 3, 10, 10), 0, dtype=torch.float), "Input flow should have shape"),
+ (torch.full((2, 10), 0, dtype=torch.float), "Input flow should have shape"),
+ (torch.full((5, 2, 10), 0, dtype=torch.float), "Input flow should have shape"),
+ (torch.full((2, 10, 30), 0, dtype=torch.int), "Flow should be of dtype torch.float"),
+ ),
+)
+def test_flow_to_image_errors(input_flow, match):
+ with pytest.raises(ValueError, match=match):
+ utils.flow_to_image(flow=input_flow)
 
 
 if __name__ == "__main__":