From e5277b6cec59a5d51279225edcac502e5ec020f2 Mon Sep 17 00:00:00 2001 From: MengzhangLI Date: Tue, 8 Mar 2022 16:40:20 +0800 Subject: [PATCH] [Feature] Provide URLs of STDC, Segmenter and Twins pretrained models --- configs/_base_/models/segmenter_vit-b16_mask.py | 3 ++- configs/_base_/models/twins_pcpvt-s_fpn.py | 5 +++-- configs/_base_/models/twins_pcpvt-s_upernet.py | 5 +++-- configs/segmenter/README.md | 4 ++-- .../segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py | 3 ++- .../segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py | 4 +++- .../segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py | 4 +++- configs/stdc/README.md | 4 ++-- configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py | 4 ++-- configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py | 4 ++-- configs/twins/README.md | 4 ++-- .../twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py | 5 +++-- .../twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py | 5 +++-- .../twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py | 5 +++-- .../twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py | 6 ++++-- .../twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py | 5 +++-- .../twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py | 6 ++++-- .../twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py | 5 +++-- .../twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py | 6 ++++-- .../twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py | 6 ++++-- .../twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py | 6 ++++-- 21 files changed, 61 insertions(+), 38 deletions(-) diff --git a/configs/_base_/models/segmenter_vit-b16_mask.py b/configs/_base_/models/segmenter_vit-b16_mask.py index 967a65c200..622f1228a4 100644 --- a/configs/_base_/models/segmenter_vit-b16_mask.py +++ b/configs/_base_/models/segmenter_vit-b16_mask.py @@ -1,8 +1,9 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa # model settings backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) model = dict( type='EncoderDecoder', - pretrained='pretrain/vit_base_p16_384.pth', + pretrained=checkpoint, backbone=dict( type='VisionTransformer', img_size=(512, 512), diff --git a/configs/_base_/models/twins_pcpvt-s_fpn.py b/configs/_base_/models/twins_pcpvt-s_fpn.py index e7722759b8..0f4488a75a 100644 --- a/configs/_base_/models/twins_pcpvt-s_fpn.py +++ b/configs/_base_/models/twins_pcpvt-s_fpn.py @@ -1,3 +1,5 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa + # model settings backbone_norm_cfg = dict(type='LN') norm_cfg = dict(type='SyncBN', requires_grad=True) @@ -5,8 +7,7 @@ type='EncoderDecoder', backbone=dict( type='PCPVT', - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/pcpvt_small.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), in_channels=3, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], diff --git a/configs/_base_/models/twins_pcpvt-s_upernet.py b/configs/_base_/models/twins_pcpvt-s_upernet.py index a48e1a9534..14a74b988d 100644 --- a/configs/_base_/models/twins_pcpvt-s_upernet.py +++ b/configs/_base_/models/twins_pcpvt-s_upernet.py @@ -1,3 +1,5 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa + # model settings backbone_norm_cfg = dict(type='LN') norm_cfg = dict(type='SyncBN', requires_grad=True) @@ -5,8 +7,7 @@ type='EncoderDecoder', backbone=dict( type='PCPVT', - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/pcpvt_small.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), in_channels=3, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], diff --git a/configs/segmenter/README.md b/configs/segmenter/README.md index 29ba260a9e..c072956fdc 100644 --- a/configs/segmenter/README.md +++ b/configs/segmenter/README.md @@ -33,9 +33,9 @@ Image segmentation is often ambiguous at the level of individual image patches a ## Usage -To use the pre-trained ViT model from [Segmenter](https://github.com/rstrudel/segmenter), it is necessary to convert keys. +We have provided pretrained models converted from [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106). -We provide a script [`vitjax2mmseg.py`](../../tools/model_converters/vitjax2mmseg.py) in the tools directory to convert the key of models from [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106) to MMSegmentation style. +If you want to convert keys on your own to use the pre-trained ViT model from [Segmenter](https://github.com/rstrudel/segmenter), we also provide a script [`vitjax2mmseg.py`](../../tools/model_converters/vitjax2mmseg.py) in the tools directory to convert the key of models from [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106) to MMSegmentation style. ```shell python tools/model_converters/vitjax2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} diff --git a/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py index 0ed004e55c..718657093c 100644 --- a/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-l_mask_8x1_512x512_160k_ade20k.py @@ -3,9 +3,10 @@ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_large_p16_384_20220308-d4efb41d.pth' # noqa model = dict( - pretrained='pretrain/vit_large_p16_384.pth', + pretrained=checkpoint, backbone=dict( type='VisionTransformer', img_size=(640, 640), diff --git a/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py index 8455ebe1da..7e0eeb1be4 100644 --- a/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py @@ -4,9 +4,11 @@ '../_base_/schedules/schedule_160k.py' ] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_small_p16_384_20220308-410f6037.pth' # noqa + backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) model = dict( - pretrained='pretrain/vit_small_p16_384.pth', + pretrained=checkpoint, backbone=dict( img_size=(512, 512), embed_dims=384, diff --git a/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py b/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py index c9332fe8e5..ec0107d550 100644 --- a/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py +++ b/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py @@ -4,8 +4,10 @@ '../_base_/schedules/schedule_160k.py' ] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_tiny_p16_384_20220308-cce8c795.pth' # noqa + model = dict( - pretrained='pretrain/vit_tiny_p16_384.pth', + pretrained=checkpoint, backbone=dict(embed_dims=192, num_heads=3), decode_head=dict( type='SegmenterMaskTransformerHead', diff --git a/configs/stdc/README.md b/configs/stdc/README.md index ca5886af59..466a91a5e0 100644 --- a/configs/stdc/README.md +++ b/configs/stdc/README.md @@ -35,9 +35,9 @@ BiSeNet has been proved to be a popular two-stream network for real-time segment ## Usage -To use original repositories' [ImageNet Pretrained STDCNet Weights](https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1) , it is necessary to convert keys. +We have provided [ImageNet Pretrained STDCNet Weights](https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1) models converted from [official repo](https://github.com/MichaelFan01/STDC-Seg). -We provide a script [`stdc2mmseg.py`](../../tools/model_converters/stdc2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/MichaelFan01/STDC-Seg) to MMSegmentation style. +If you want to convert keys on your own to use official repositories' pre-trained models, we also provide a script [`stdc2mmseg.py`](../../tools/model_converters/stdc2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/MichaelFan01/STDC-Seg) to MMSegmentation style. ```shell python tools/model_converters/stdc2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} ${STDC_TYPE} diff --git a/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py b/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py index 4845b4dc87..f295bf494e 100644 --- a/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py +++ b/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py @@ -1,6 +1,6 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/stdc/stdc1_20220308-5368626c.pth' # noqa _base_ = './stdc1_512x1024_80k_cityscapes.py' model = dict( backbone=dict( backbone_cfg=dict( - init_cfg=dict( - type='Pretrained', checkpoint='./pretrained/stdc1.pth')))) + init_cfg=dict(type='Pretrained', checkpoint=checkpoint)))) diff --git a/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py b/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py index 17c0b15ca7..4148ac4fd0 100644 --- a/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py +++ b/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py @@ -1,6 +1,6 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/stdc/stdc2_20220308-7dbd9127.pth' # noqa _base_ = './stdc2_512x1024_80k_cityscapes.py' model = dict( backbone=dict( backbone_cfg=dict( - init_cfg=dict( - type='Pretrained', checkpoint='./pretrained/stdc2.pth')))) + init_cfg=dict(type='Pretrained', checkpoint=checkpoint)))) diff --git a/configs/twins/README.md b/configs/twins/README.md index 0ecb79d018..e2218352eb 100644 --- a/configs/twins/README.md +++ b/configs/twins/README.md @@ -34,9 +34,9 @@ Very recently, a variety of vision transformer architectures for dense predictio ## Usage -To use other repositories' pre-trained models, it is necessary to convert keys. +We have provided pretrained models converted from [official repo](https://github.com/Meituan-AutoML/Twins). -We provide a script [`twins2mmseg.py`](../../tools/model_converters/twins2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/Meituan-AutoML/Twins) to MMSegmentation style. +If you want to convert keys on your own to use official repositories' pre-trained models, we also provide a script [`twins2mmseg.py`](../../tools/model_converters/twins2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/Meituan-AutoML/Twins) to MMSegmentation style. ```shell python tools/model_converters/twins2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} ${MODEL_TYPE} diff --git a/configs/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/configs/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py index 1da668a99e..b79fefd4a5 100644 --- a/configs/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py +++ b/configs/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -1,7 +1,8 @@ _base_ = ['./twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_base_20220308-0621964c.pth' # noqa + model = dict( backbone=dict( - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/pcpvt_base.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), depths=[3, 4, 18, 3]), ) diff --git a/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py index 95f0bd4108..8c299d32a2 100644 --- a/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py @@ -1,9 +1,10 @@ _base_ = ['./twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_base_20220308-0621964c.pth' # noqa + model = dict( backbone=dict( - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/pcpvt_base.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), depths=[3, 4, 18, 3], drop_path_rate=0.3)) diff --git a/configs/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/configs/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py index e8fcd9326e..abb652e8e0 100644 --- a/configs/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py +++ b/configs/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -1,7 +1,8 @@ _base_ = ['./twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_large_20220308-37579dc6.pth' # noqa + model = dict( backbone=dict( - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/pcpvt_large.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), depths=[3, 8, 27, 3])) diff --git a/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py index 90307ee3a3..f6f7d2771c 100644 --- a/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py @@ -1,8 +1,10 @@ _base_ = ['./twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_large_20220308-37579dc6.pth' # noqa + model = dict( backbone=dict( - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/pcpvt_large.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), depths=[3, 8, 27, 3], drop_path_rate=0.3)) diff --git a/configs/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/configs/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py index a6484cf7f7..00d89572c6 100644 --- a/configs/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py +++ b/configs/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -1,9 +1,10 @@ _base_ = ['./twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_base_20220308-1b7eb711.pth' # noqa + model = dict( backbone=dict( - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/alt_gvt_base.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=[96, 192, 384, 768], num_heads=[3, 6, 12, 24], depths=[2, 2, 18, 2]), diff --git a/configs/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py index 7c2ffce95f..a969fedfed 100644 --- a/configs/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py @@ -1,8 +1,10 @@ _base_ = ['./twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_base_20220308-1b7eb711.pth' # noqa + model = dict( backbone=dict( - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/alt_gvt_base.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=[96, 192, 384, 768], num_heads=[3, 6, 12, 24], depths=[2, 2, 18, 2]), diff --git a/configs/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/configs/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py index 8ec0ed3ffa..c68bfd4a17 100644 --- a/configs/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py +++ b/configs/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -1,9 +1,10 @@ _base_ = ['./twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py'] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_large_20220308-fb5936f3.pth' # noqa + model = dict( backbone=dict( - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/alt_gvt_large.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=[128, 256, 512, 1024], num_heads=[4, 8, 16, 32], depths=[2, 2, 18, 2], diff --git a/configs/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py index aba31532d9..f98c070b2d 100644 --- a/configs/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py @@ -1,8 +1,10 @@ _base_ = ['./twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_large_20220308-fb5936f3.pth' # noqa + model = dict( backbone=dict( - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/alt_gvt_large.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=[128, 256, 512, 1024], num_heads=[4, 8, 16, 32], depths=[2, 2, 18, 2], diff --git a/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py index dd4ef77659..dbb944c20f 100644 --- a/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py +++ b/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -2,11 +2,13 @@ '../_base_/models/twins_pcpvt-s_fpn.py', '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' ] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_small_20220308-7e1c3695.pth' # noqa + model = dict( backbone=dict( type='SVT', - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/alt_gvt_small.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=[64, 128, 256, 512], num_heads=[2, 4, 8, 16], mlp_ratios=[4, 4, 4, 4], diff --git a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py index 05948391e4..44bf60b350 100644 --- a/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py +++ b/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py @@ -3,11 +3,13 @@ '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' ] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_small_20220308-7e1c3695.pth' # noqa + model = dict( backbone=dict( type='SVT', - init_cfg=dict( - type='Pretrained', checkpoint='pretrained/alt_gvt_small.pth'), + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), embed_dims=[64, 128, 256, 512], num_heads=[2, 4, 8, 16], mlp_ratios=[4, 4, 4, 4],