Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge dev-1.x to pruning #311

Merged
merged 8 commits into from
Oct 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 36 additions & 76 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,79 +121,39 @@ jobs:
name: codecov-umbrella
fail_ci_if_error: false



test_cuda:
runs-on: ubuntu-18.04
container:
image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
strategy:
matrix:
python-version: [3.7]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: pip install pip --upgrade
- name: Fetch GPG keys
run: |
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
- name: Install Python-dev
run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
if: ${{matrix.python-version != 3.9}}
- name: Install system dependencies
run: |
apt-get update
apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev
- name: Install mmrazor dependencies
run: |
pip install git+https://github.com/open-mmlab/mmengine.git@main
pip install -U openmim
mim install 'mmcv >= 2.0.0rc1'
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x
pip install -r requirements.txt
- name: Build and install
run: |
python setup.py check -m -s
TORCH_CUDA_ARCH_LIST=7.0 pip install -e .

test_windows:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-2022]
python: [3.7]
platform: [cpu]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: pip install pip --upgrade
- name: Install lmdb
run: pip install lmdb
- name: Install PyTorch
run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
- name: Install mmrazor dependencies
run: |
pip install git+https://github.com/open-mmlab/mmengine.git@main
pip install -U openmim
mim install 'mmcv >= 2.0.0rc1'
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x
pip install -r requirements.txt
- name: Build and install
run: |
pip install -e .
- name: Run unittests and generate coverage report
run: |
pytest tests/
# test_windows:
# runs-on: ${{ matrix.os }}
# strategy:
# matrix:
# os: [windows-2022]
# python: [3.7]
# platform: [cpu]
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v2
# with:
# python-version: ${{ matrix.python-version }}
# - name: Upgrade pip
# run: |
# pip install pip --upgrade
# pip install wheel
# - name: Install lmdb
# run: pip install lmdb
# - name: Install PyTorch
# run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
# - name: Install mmrazor dependencies
# run: |
# pip install git+https://github.com/open-mmlab/mmengine.git@main
# pip install -U openmim
# mim install 'mmcv >= 2.0.0rc1'
# pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
# pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x
# pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x
# pip install -r requirements.txt
# - name: Build and install
# run: |
# pip install -e .
# - name: Run unittests and generate coverage report
# run: |
# pytest tests/
28 changes: 28 additions & 0 deletions configs/_base_/nas_backbones/dsnas_shufflenet_supernet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
norm_cfg = dict(type='BN', eps=0.01)

_STAGE_MUTABLE = dict(
type='mmrazor.OneHotMutableOP',
fix_threshold=0.3,
candidates=dict(
shuffle_3x3=dict(
type='ShuffleBlock', kernel_size=3, norm_cfg=norm_cfg),
shuffle_5x5=dict(
type='ShuffleBlock', kernel_size=5, norm_cfg=norm_cfg),
shuffle_7x7=dict(
type='ShuffleBlock', kernel_size=7, norm_cfg=norm_cfg),
shuffle_xception=dict(type='ShuffleXception', norm_cfg=norm_cfg)))

arch_setting = [
# Parameters to build layers. 3 parameters are needed to construct a
# layer, from left to right: channel, num_blocks, mutable_cfg.
[64, 4, _STAGE_MUTABLE],
[160, 4, _STAGE_MUTABLE],
[320, 8, _STAGE_MUTABLE],
[640, 4, _STAGE_MUTABLE]
]

nas_backbone = dict(
type='mmrazor.SearchableShuffleNetV2',
widen_factor=1.0,
arch_setting=arch_setting,
norm_cfg=norm_cfg)
102 changes: 102 additions & 0 deletions configs/_base_/settings/imagenet_bs1024_dsnas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# dataset settings
dataset_type = 'mmcls.ImageNet'
data_preprocessor = dict(
type='mmcls.ClsDataPreprocessor',
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)

train_pipeline = [
dict(type='mmcls.LoadImageFromFile'),
dict(type='mmcls.RandomResizedCrop', scale=224),
dict(type='mmcls.RandomFlip', prob=0.5, direction='horizontal'),
dict(type='mmcls.PackClsInputs'),
]

test_pipeline = [
dict(type='mmcls.LoadImageFromFile'),
dict(type='mmcls.ResizeEdge', scale=256, edge='short'),
dict(type='mmcls.CenterCrop', crop_size=224),
dict(type='mmcls.PackClsInputs'),
]

train_dataloader = dict(
batch_size=128,
num_workers=4,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='mmcls.DefaultSampler', shuffle=True),
persistent_workers=True,
)

val_dataloader = dict(
batch_size=128,
num_workers=4,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='mmcls.DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='mmcls.Accuracy', topk=(1, 5))

# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator

# optimizer
paramwise_cfg = dict(bias_decay_mult=0.0, norm_decay_mult=0.0)

optim_wrapper = dict(
constructor='mmrazor.SeparateOptimWrapperConstructor',
architecture=dict(
optimizer=dict(
type='mmcls.SGD', lr=0.5, momentum=0.9, weight_decay=4e-5),
paramwise_cfg=paramwise_cfg),
mutator=dict(
optimizer=dict(
type='mmcls.Adam', lr=0.001, weight_decay=0.0, betas=(0.5,
0.999))))

search_epochs = 85
# leanring policy
param_scheduler = dict(
architecture=[
dict(
type='mmcls.LinearLR',
end=5,
start_factor=0.2,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='mmcls.CosineAnnealingLR',
T_max=240,
begin=5,
end=search_epochs,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='mmcls.CosineAnnealingLR',
T_max=160,
begin=search_epochs,
end=240,
eta_min=0.0,
by_epoch=True,
convert_to_iter_based=True)
],
mutator=[])

# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=240)
val_cfg = dict()
test_cfg = dict()
30 changes: 30 additions & 0 deletions configs/distill/mmcls/crd/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# CONTRASTIVE REPRESENTATION DISTILLATION

> [CONTRASTIVE REPRESENTATION DISTILLATION](https://arxiv.org/abs/1910.10699)

## Abstract

Often we wish to transfer representational knowledge from one neural network to another. Examples include distilling a large network into a smaller one, transferring knowledge from one sensory modality to a second, or ensembling a collection of models into a single estimator. Knowledge distillation, the standard approach to these problems, minimizes the KL divergence between the probabilistic outputs of a teacher and student network. We demonstrate that this objective ignores important structural knowledge of the teacher network. This motivates an alternative objective by which we train a student to capture significantly more information in the teacher’s representation of the data. We formulate this objective as contrastive learning. Experiments demonstrate that our resulting new objective outperforms knowledge distillation and other cutting-edge distillers on a variety of knowledge transfer tasks, including single model compression, ensemble distillation, and cross-modal transfer. Our method sets a new state-of-the-art in many transfer tasks, and sometimes even outperforms the teacher network when combined with knowledge distillation.[Original code](http://github.com/HobbitLong/RepDistiller)

![pipeline](../../../../docs/en/imgs/model_zoo/crd/pipeline.jpg)

## Citation

```latex
@article{tian2019contrastive,
title={Contrastive representation distillation},
author={Tian, Yonglong and Krishnan, Dilip and Isola, Phillip},
journal={arXiv preprint arXiv:1910.10699},
year={2019}
}
```

## Results and models

| Dataset | Model | Teacher | Top-1 (%) | Top-5 (%) | Configs | Download |
| ------- | --------- | --------- | --------- | --------- | ------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- |
| CIFAR10 | ResNet-18 | ResNet-50 | 94.79 | 99.86 | [config](crd_neck_r50_r18_8xb16_cifar10.py) | [teacher](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.pth) \|[model](<>) \| [log](<>) |

## Acknowledgement

Shout out to @chengshuang18 for his special contribution.
108 changes: 108 additions & 0 deletions configs/distill/mmcls/crd/crd_neck_r50_r18_8xb16_cifar10.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
_base_ = [
'mmcls::_base_/datasets/cifar10_bs16.py',
'mmcls::_base_/schedules/cifar10_bs128.py',
'mmcls::_base_/default_runtime.py'
]

model = dict(
_scope_='mmrazor',
type='SingleTeacherDistill',
data_preprocessor=dict(
type='ImgDataPreprocessor',
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
bgr_to_rgb=True),
architecture=dict(
cfg_path='mmcls::resnet/resnet18_8xb16_cifar10.py', pretrained=False),
teacher=dict(
cfg_path='mmcls::resnet/resnet50_8xb16_cifar10.py', pretrained=True),
teacher_ckpt='resnet50_b16x8_cifar10_20210528-f54bfad9.pth',
distiller=dict(
type='ConfigurableDistiller',
student_recorders=dict(
neck=dict(type='ModuleOutputs', source='neck.gap'),
data_samples=dict(type='ModuleInputs', source='')),
teacher_recorders=dict(
neck=dict(type='ModuleOutputs', source='neck.gap')),
distill_losses=dict(loss_crd=dict(type='CRDLoss', loss_weight=0.8)),
connectors=dict(
loss_crd_stu=dict(type='CRDConnector', dim_in=512, dim_out=128),
loss_crd_tea=dict(type='CRDConnector', dim_in=2048, dim_out=128)),
loss_forward_mappings=dict(
loss_crd=dict(
s_feats=dict(
from_student=True,
recorder='neck',
connector='loss_crd_stu'),
t_feats=dict(
from_student=False,
recorder='neck',
connector='loss_crd_tea'),
data_samples=dict(
from_student=True, recorder='data_samples', data_idx=1)))))

find_unused_parameters = True

val_cfg = dict(_delete_=True, type='mmrazor.SingleTeacherDistillValLoop')

# change `CIFAR10` dataset to `CRDDataset` dataset.
dataset_type = 'CIFAR10'
train_pipeline = [
dict(_scope_='mmcls', type='RandomCrop', crop_size=32, padding=4),
dict(_scope_='mmcls', type='RandomFlip', prob=0.5, direction='horizontal'),
dict(_scope_='mmrazor', type='PackCRDClsInputs'),
]

test_pipeline = [
dict(_scope_='mmrazor', type='PackCRDClsInputs'),
]

ori_train_dataset = dict(
_scope_='mmcls',
type=dataset_type,
data_prefix='data/cifar10',
test_mode=False,
pipeline=train_pipeline)

crd_train_dataset = dict(
_scope_='mmrazor',
type='CRDDataset',
dataset=ori_train_dataset,
neg_num=16384,
sample_mode='exact',
percent=1.0)

ori_test_dataset = dict(
_scope_='mmcls',
type=dataset_type,
data_prefix='data/cifar10/',
test_mode=True,
pipeline=test_pipeline)

crd_test_dataset = dict(
_scope_='mmrazor',
type='CRDDataset',
dataset=ori_test_dataset,
neg_num=16384,
sample_mode='exact',
percent=1.0)

train_dataloader = dict(
_delete_=True,
batch_size=16,
num_workers=2,
dataset=crd_train_dataset,
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)

val_dataloader = dict(
_delete_=True,
batch_size=16,
num_workers=2,
dataset=crd_test_dataset,
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
Loading