Adding pre-trained weights for EfficientNetV2-s

datumbox · datumbox · commit bf41dfb5fe36 · 2022-02-27T13:36:44.000Z
diff --git a/docs/source/models.rst b/docs/source/models.rst
@@ -38,7 +38,7 @@ architectures for image classification:
 -  `ResNeXt`_
 -  `Wide ResNet`_
 -  `MNASNet`_
--  `EfficientNet`_
+-  `EfficientNet`_ v1 & v2
 -  `RegNet`_
 -  `VisionTransformer`_
 -  `ConvNeXt`_
@@ -70,6 +70,9 @@ You can construct a model with random weights by calling its constructor:
     efficientnet_b5 = models.efficientnet_b5()
     efficientnet_b6 = models.efficientnet_b6()
     efficientnet_b7 = models.efficientnet_b7()
+    efficientnet_v2_s = models.efficientnet_v2_s()
+    efficientnet_v2_m = models.efficientnet_v2_m()
+    efficientnet_v2_l = models.efficientnet_v2_l()
     regnet_y_400mf = models.regnet_y_400mf()
     regnet_y_800mf = models.regnet_y_800mf()
     regnet_y_1_6gf = models.regnet_y_1_6gf()
@@ -122,6 +125,9 @@ These can be constructed by passing ``pretrained=True``:
     efficientnet_b5 = models.efficientnet_b5(pretrained=True)
     efficientnet_b6 = models.efficientnet_b6(pretrained=True)
     efficientnet_b7 = models.efficientnet_b7(pretrained=True)
+    efficientnet_v2_s = models.efficientnet_v2_s(pretrained=True)
+    efficientnet_v2_m = models.efficientnet_v2_m(pretrained=True)
+    efficientnet_v2_l = models.efficientnet_v2_l(pretrained=True)
     regnet_y_400mf = models.regnet_y_400mf(pretrained=True)
     regnet_y_800mf = models.regnet_y_800mf(pretrained=True)
     regnet_y_1_6gf = models.regnet_y_1_6gf(pretrained=True)
@@ -238,6 +244,7 @@ EfficientNet-B4                   83.384          96.594
 EfficientNet-B5                   83.444          96.628
 EfficientNet-B6                   84.008          96.916
 EfficientNet-B7                   84.122          96.908
+EfficientNetV2-s                  84.228          96.878
 regnet_x_400mf                    72.834          90.950
 regnet_x_800mf                    75.212          92.348
 regnet_x_1_6gf                    77.040          93.440
@@ -439,6 +446,9 @@ EfficientNet
     efficientnet_b5
     efficientnet_b6
     efficientnet_b7
+    efficientnet_v2_s
+    efficientnet_v2_m
+    efficientnet_v2_l
 
 RegNet
 ------------
diff --git a/hubconf.py b/hubconf.py
@@ -13,6 +13,9 @@
     efficientnet_b5,
     efficientnet_b6,
     efficientnet_b7,
+    efficientnet_v2_s,
+    efficientnet_v2_m,
+    efficientnet_v2_l,
 )
 from torchvision.models.googlenet import googlenet
 from torchvision.models.inception import inception_v3
diff --git a/references/classification/README.md b/references/classification/README.md
@@ -88,7 +88,7 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@
 and [#3354](https://github.com/pytorch/vision/pull/3354) for details.
 
 
-### EfficientNet
+### EfficientNet-V1
 
 The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108).
 
@@ -114,6 +114,24 @@ torchrun --nproc_per_node=8 train.py --model efficientnet_b7 --interpolation bic
       --val-resize-size 600 --val-crop-size 600 --train-crop-size 600 --test-only --pretrained
 ```
 
+
+### EfficientNet-V2
+```
+torchrun --nproc_per_node=8 train.py \
+--model $MODEL --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr \
+--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \
+--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.00002 --norm-weight-decay 0.0 \
+--train-crop-size $TRAIN_SIZE --model-ema --val-crop-size $EVAL_SIZE --val-resize-size $EVAL_SIZE \
+--ra-sampler --ra-reps 4
+```
+Here `$MODEL` is one of `efficientnet_v2_s`, `efficientnet_v2_m` and `efficientnet_v2_l`. 
+Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the other variants `384` and `480` respectively.
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
+and `--batch_size 16`.
+
+
 ### RegNet
 
 #### Small models
diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
@@ -42,8 +42,8 @@
     "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
     "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
     "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
-    # Temporary TF weights
-    "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth",
+    # Weights trained with TorchVision
+    "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth",
 }
 
 
diff --git a/torchvision/prototype/models/efficientnet.py b/torchvision/prototype/models/efficientnet.py
@@ -60,7 +60,6 @@ def _efficientnet(
 _COMMON_META = {
     "task": "image_classification",
     "categories": _IMAGENET_CATEGORIES,
-    "interpolation": InterpolationMode.BICUBIC,
     "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet",
 }
 
@@ -69,6 +68,7 @@ def _efficientnet(
     **_COMMON_META,
     "architecture": "EfficientNet",
     "publication_year": 2019,
+    "interpolation": InterpolationMode.BICUBIC,
     "min_size": (1, 1),
 }
 
@@ -77,6 +77,7 @@ def _efficientnet(
     **_COMMON_META,
     "architecture": "EfficientNetV2",
     "publication_year": 2021,
+    "interpolation": InterpolationMode.BILINEAR,
     "min_size": (33, 33),
 }
 
@@ -216,21 +217,19 @@ class EfficientNet_B7_Weights(WeightsEnum):
 
 class EfficientNet_V2_S_Weights(WeightsEnum):
     IMAGENET1K_V1 = Weights(
-        url="https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth",
+        url="https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth",
         transforms=partial(
             ImageNetEval,
             crop_size=384,
             resize_size=384,
-            interpolation=InterpolationMode.BICUBIC,
-            mean=(0.5, 0.5, 0.5),
-            std=(0.5, 0.5, 0.5),
+            interpolation=InterpolationMode.BILINEAR,
         ),
         meta={
             **_COMMON_META_V2,
             "num_params": 21458488,
             "size": (384, 384),
-            "acc@1": 83.152,
-            "acc@5": 96.400,
+            "acc@1": 84.228,
+            "acc@5": 96.878,
         },
     )
     DEFAULT = IMAGENET1K_V1

Original file line number	Diff line number	Diff line change
`@@ -42,8 +42,8 @@`
`42`	`42`	`"efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",`
`43`	`43`	`"efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",`
`44`	`44`	`"efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",`
`45`		`- # Temporary TF weights`
`46`		`- "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-tmp.pth",`
	`45`	`+ # Weights trained with TorchVision`
	`46`	`+ "efficientnet_v2_s": "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth",`
`47`	`47`	`}`
`48`	`48`
`49`	`49`