-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Tools] Add vit/swin/mit convert weight scripts (#783)
* init scripts * update markdown * update markdown * add docs * delete mit converter and use torch load function * rename segformer readme * update doc * modify doc * 更新中文文档 * Update useful_tools.md * Update useful_tools.md * modify doc * update segformer.yml
- Loading branch information
谢昕辰
authored
Aug 18, 2021
1 parent
441be4e
commit f72727c
Showing
8 changed files
with
381 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
Collections: | ||
- Metadata: | ||
Training Data: | ||
- ADE20k | ||
Name: segformer | ||
Models: | ||
- Config: configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py | ||
In Collection: segformer | ||
Metadata: | ||
backbone: MIT-B0 | ||
crop size: (512,512) | ||
inference time (ms/im): | ||
- backend: PyTorch | ||
batch size: 1 | ||
hardware: V100 | ||
mode: FP32 | ||
resolution: (512,512) | ||
value: 19.49 | ||
lr schd: 160000 | ||
memory (GB): 2.1 | ||
Name: segformer_mit-b0_512x512_160k_ade20k | ||
Results: | ||
Dataset: ADE20k | ||
Metrics: | ||
mIoU: 37.41 | ||
mIoU(ms+flip): 38.34 | ||
Task: Semantic Segmentation | ||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20210726_101530-8ffa8fda.pth | ||
- Config: configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py | ||
In Collection: segformer | ||
Metadata: | ||
backbone: MIT-B1 | ||
crop size: (512,512) | ||
inference time (ms/im): | ||
- backend: PyTorch | ||
batch size: 1 | ||
hardware: V100 | ||
mode: FP32 | ||
resolution: (512,512) | ||
value: 20.98 | ||
lr schd: 160000 | ||
memory (GB): 2.6 | ||
Name: segformer_mit-b1_512x512_160k_ade20k | ||
Results: | ||
Dataset: ADE20k | ||
Metrics: | ||
mIoU: 40.97 | ||
mIoU(ms+flip): 42.54 | ||
Task: Semantic Segmentation | ||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20210726_112106-d70e859d.pth | ||
- Config: configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py | ||
In Collection: segformer | ||
Metadata: | ||
backbone: MIT-B2 | ||
crop size: (512,512) | ||
inference time (ms/im): | ||
- backend: PyTorch | ||
batch size: 1 | ||
hardware: V100 | ||
mode: FP32 | ||
resolution: (512,512) | ||
value: 32.38 | ||
lr schd: 160000 | ||
memory (GB): 3.6 | ||
Name: segformer_mit-b2_512x512_160k_ade20k | ||
Results: | ||
Dataset: ADE20k | ||
Metrics: | ||
mIoU: 45.58 | ||
mIoU(ms+flip): 47.03 | ||
Task: Semantic Segmentation | ||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20210726_112103-cbd414ac.pth | ||
- Config: configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py | ||
In Collection: segformer | ||
Metadata: | ||
backbone: MIT-B3 | ||
crop size: (512,512) | ||
inference time (ms/im): | ||
- backend: PyTorch | ||
batch size: 1 | ||
hardware: V100 | ||
mode: FP32 | ||
resolution: (512,512) | ||
value: 45.23 | ||
lr schd: 160000 | ||
memory (GB): 4.8 | ||
Name: segformer_mit-b3_512x512_160k_ade20k | ||
Results: | ||
Dataset: ADE20k | ||
Metrics: | ||
mIoU: 47.82 | ||
mIoU(ms+flip): 48.81 | ||
Task: Semantic Segmentation | ||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20210726_081410-962b98d2.pth | ||
- Config: configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py | ||
In Collection: segformer | ||
Metadata: | ||
backbone: MIT-B4 | ||
crop size: (512,512) | ||
inference time (ms/im): | ||
- backend: PyTorch | ||
batch size: 1 | ||
hardware: V100 | ||
mode: FP32 | ||
resolution: (512,512) | ||
value: 64.72 | ||
lr schd: 160000 | ||
memory (GB): 6.1 | ||
Name: segformer_mit-b4_512x512_160k_ade20k | ||
Results: | ||
Dataset: ADE20k | ||
Metrics: | ||
mIoU: 48.46 | ||
mIoU(ms+flip): 49.76 | ||
Task: Semantic Segmentation | ||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20210728_183055-7f509d7d.pth | ||
- Config: configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py | ||
In Collection: segformer | ||
Metadata: | ||
backbone: MIT-B5 | ||
crop size: (512,512) | ||
inference time (ms/im): | ||
- backend: PyTorch | ||
batch size: 1 | ||
hardware: V100 | ||
mode: FP32 | ||
resolution: (512,512) | ||
value: 84.1 | ||
lr schd: 160000 | ||
memory (GB): 7.2 | ||
Name: segformer_mit-b5_512x512_160k_ade20k | ||
Results: | ||
Dataset: ADE20k | ||
Metrics: | ||
mIoU: 49.13 | ||
mIoU(ms+flip): 50.22 | ||
Task: Semantic Segmentation | ||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth | ||
- Config: configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py | ||
In Collection: segformer | ||
Metadata: | ||
backbone: MIT-B5 | ||
crop size: (640,640) | ||
inference time (ms/im): | ||
- backend: PyTorch | ||
batch size: 1 | ||
hardware: V100 | ||
mode: FP32 | ||
resolution: (640,640) | ||
value: 88.5 | ||
lr schd: 160000 | ||
memory (GB): 11.5 | ||
Name: segformer_mit-b5_640x640_160k_ade20k | ||
Results: | ||
Dataset: ADE20k | ||
Metrics: | ||
mIoU: 49.62 | ||
mIoU(ms+flip): 50.36 | ||
Task: Semantic Segmentation | ||
Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20210801_121243-41d2845b.pth |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import argparse | ||
from collections import OrderedDict | ||
|
||
import torch | ||
|
||
|
||
def convert_swin(ckpt): | ||
new_ckpt = OrderedDict() | ||
|
||
def correct_unfold_reduction_order(x): | ||
out_channel, in_channel = x.shape | ||
x = x.reshape(out_channel, 4, in_channel // 4) | ||
x = x[:, [0, 2, 1, 3], :].transpose(1, | ||
2).reshape(out_channel, in_channel) | ||
return x | ||
|
||
def correct_unfold_norm_order(x): | ||
in_channel = x.shape[0] | ||
x = x.reshape(4, in_channel // 4) | ||
x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel) | ||
return x | ||
|
||
for k, v in ckpt.items(): | ||
if k.startswith('head'): | ||
continue | ||
elif k.startswith('layers'): | ||
new_v = v | ||
if 'attn.' in k: | ||
new_k = k.replace('attn.', 'attn.w_msa.') | ||
elif 'mlp.' in k: | ||
if 'mlp.fc1.' in k: | ||
new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.') | ||
elif 'mlp.fc2.' in k: | ||
new_k = k.replace('mlp.fc2.', 'ffn.layers.1.') | ||
else: | ||
new_k = k.replace('mlp.', 'ffn.') | ||
elif 'downsample' in k: | ||
new_k = k | ||
if 'reduction.' in k: | ||
new_v = correct_unfold_reduction_order(v) | ||
elif 'norm.' in k: | ||
new_v = correct_unfold_norm_order(v) | ||
else: | ||
new_k = k | ||
new_k = new_k.replace('layers', 'stages', 1) | ||
elif k.startswith('patch_embed'): | ||
new_v = v | ||
if 'proj' in k: | ||
new_k = k.replace('proj', 'projection') | ||
else: | ||
new_k = k | ||
else: | ||
new_v = v | ||
new_k = k | ||
|
||
new_ckpt[new_k] = new_v | ||
|
||
return new_ckpt | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser( | ||
description='Convert keys in official pretrained swin models to' | ||
'MMSegmentation style.') | ||
parser.add_argument('src', help='src segmentation model path') | ||
# The dst path must be a full path of the new checkpoint. | ||
parser.add_argument('dst', help='save path') | ||
args = parser.parse_args() | ||
|
||
checkpoint = torch.load(args.src, map_location='cpu') | ||
if 'state_dict' in checkpoint: | ||
state_dict = checkpoint['state_dict'] | ||
elif 'model' in checkpoint: | ||
state_dict = checkpoint['model'] | ||
else: | ||
state_dict = checkpoint | ||
weight = convert_swin(state_dict) | ||
with open(args.dst, 'wb') as f: | ||
torch.save(weight, f) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.