From cb5acfeacde6dccd616c9924a22502a1a426738c Mon Sep 17 00:00:00 2001
From: Dayan-Guan <75825029+Dayan-Guan@users.noreply.github.com>
Date: Sat, 19 Mar 2022 16:48:26 +0800
Subject: [PATCH] Delete models directory

---
 models/modeling/aspp.py                       |  98 -----
 models/modeling/backbone/__init__.py          |  24 --
 models/modeling/backbone/drn.py               | 405 ------------------
 models/modeling/backbone/mobilenet.py         | 154 -------
 models/modeling/backbone/resnet.py            | 191 ---------
 .../modeling/backbone/resnet_AuxLayers234.py  | 217 ----------
 models/modeling/backbone/xception.py          | 291 -------------
 .../backbones/get_pretrained_model.sh         |   6 -
 models/modeling/backbones/module_helper.py    | 178 --------
 models/modeling/backbones/resnet_backbone.py  | 202 ---------
 models/modeling/backbones/resnet_models.py    | 269 ------------
 models/modeling/decoder.py                    |  60 ---
 models/modeling/deeplab.py                    |  83 ----
 13 files changed, 2178 deletions(-)
 delete mode 100644 models/modeling/aspp.py
 delete mode 100644 models/modeling/backbone/__init__.py
 delete mode 100644 models/modeling/backbone/drn.py
 delete mode 100644 models/modeling/backbone/mobilenet.py
 delete mode 100644 models/modeling/backbone/resnet.py
 delete mode 100644 models/modeling/backbone/resnet_AuxLayers234.py
 delete mode 100644 models/modeling/backbone/xception.py
 delete mode 100644 models/modeling/backbones/get_pretrained_model.sh
 delete mode 100644 models/modeling/backbones/module_helper.py
 delete mode 100644 models/modeling/backbones/resnet_backbone.py
 delete mode 100644 models/modeling/backbones/resnet_models.py
 delete mode 100644 models/modeling/decoder.py
 delete mode 100644 models/modeling/deeplab.py

diff --git a/models/modeling/aspp.py b/models/modeling/aspp.py
deleted file mode 100644
index 84d9f3c..0000000
--- a/models/modeling/aspp.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from models.modeling.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
-
-class _ASPPModule(nn.Module):
-    def __init__(self, inplanes, planes, kernel_size, padding, dilation, BatchNorm):
-        super(_ASPPModule, self).__init__()
-        self.atrous_conv = nn.Conv2d(inplanes, planes, kernel_size=kernel_size,
-                                            stride=1, padding=padding, dilation=dilation, bias=False)
-        self.bn = BatchNorm(planes)
-        self.relu = nn.ReLU()
-
-        self._init_weight()
-
-    def forward(self, x):
-        x = self.atrous_conv(x)
-        x = self.bn(x)
-
-        return self.relu(x)
-
-    def _init_weight(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                torch.nn.init.kaiming_normal_(m.weight)
-            elif isinstance(m, SynchronizedBatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-class ASPP(nn.Module):
-    def __init__(self, backbone, output_stride, BatchNorm):
-        super(ASPP, self).__init__()
-        if backbone == 'drn':
-            inplanes = 512
-        elif backbone == 'mobilenet':
-            inplanes = 320
-        else:
-            inplanes = 2048
-        if output_stride == 16:
-            dilations = [1, 6, 12, 18]
-        elif output_stride == 8:
-            dilations = [1, 12, 24, 36]
-        else:
-            raise NotImplementedError
-
-        self.aspp1 = _ASPPModule(inplanes, 256, 1, padding=0, dilation=dilations[0], BatchNorm=BatchNorm)
-        self.aspp2 = _ASPPModule(inplanes, 256, 3, padding=dilations[1], dilation=dilations[1], BatchNorm=BatchNorm)
-        self.aspp3 = _ASPPModule(inplanes, 256, 3, padding=dilations[2], dilation=dilations[2], BatchNorm=BatchNorm)
-        self.aspp4 = _ASPPModule(inplanes, 256, 3, padding=dilations[3], dilation=dilations[3], BatchNorm=BatchNorm)
-
-        self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
-                                             nn.Conv2d(inplanes, 256, 1, stride=1, bias=False),
-                                             BatchNorm(256),
-                                             nn.ReLU())
-        self.conv1 = nn.Conv2d(1280, 256, 1, bias=False)
-        self.bn1 = BatchNorm(256)
-        self.relu = nn.ReLU()
-        self.dropout = nn.Dropout(0.5)
-        self._init_weight()
-
-    def forward(self, x):
-        x1 = self.aspp1(x)
-        x2 = self.aspp2(x)
-        x3 = self.aspp3(x)
-        x4 = self.aspp4(x)
-        x5 = self.global_avg_pool(x)
-        x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True)
-        x = torch.cat((x1, x2, x3, x4, x5), dim=1)
-
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-
-        return self.dropout(x)
-
-    def _init_weight(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                # m.weight.data.normal_(0, math.sqrt(2. / n))
-                torch.nn.init.kaiming_normal_(m.weight)
-            elif isinstance(m, SynchronizedBatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.SyncBatchNorm):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-
-def build_aspp(backbone, output_stride, BatchNorm):
-    return ASPP(backbone, output_stride, BatchNorm)
\ No newline at end of file
diff --git a/models/modeling/backbone/__init__.py b/models/modeling/backbone/__init__.py
deleted file mode 100644
index abcc789..0000000
--- a/models/modeling/backbone/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from models.modeling.backbone import resnet, xception, drn, mobilenet
-from models.modeling.backbone import resnet_AuxLayers234
-
-def build_backbone(backbone, output_stride, BatchNorm):
-    if backbone == 'resnet101':
-        return resnet.ResNet101(output_stride, BatchNorm)
-    elif backbone == 'resnet50':
-        return resnet.ResNet50(output_stride, BatchNorm)
-    elif backbone == 'xception':
-        return xception.AlignedXception(output_stride, BatchNorm)
-    elif backbone == 'drn':
-        return drn.drn_d_54(BatchNorm)
-    elif backbone == 'mobilenet':
-        return mobilenet.MobileNetV2(output_stride, BatchNorm)
-    else:
-        raise NotImplementedError
-
-def build_backbone_AuxLayers234(backbone, output_stride, BatchNorm):
-    if backbone == 'resnet101':
-        return resnet_AuxLayers234.ResNet101_AuxLayers234(output_stride, BatchNorm)
-    elif backbone == 'resnet50':
-        return resnet_AuxLayers234.ResNet50_AuxLayers234(output_stride, BatchNorm)
-    else:
-        raise NotImplementedError
\ No newline at end of file
diff --git a/models/modeling/backbone/drn.py b/models/modeling/backbone/drn.py
deleted file mode 100644
index 1bc871d..0000000
--- a/models/modeling/backbone/drn.py
+++ /dev/null
@@ -1,405 +0,0 @@
-import torch.nn as nn
-import math
-import torch.utils.model_zoo as model_zoo
-from models.modeling.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
-
-webroot = 'http://dl.yf.io/drn/'
-
-model_urls = {
-    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
-    'drn-c-26': webroot + 'drn_c_26-ddedf421.pth',
-    'drn-c-42': webroot + 'drn_c_42-9d336e8c.pth',
-    'drn-c-58': webroot + 'drn_c_58-0a53a92c.pth',
-    'drn-d-22': webroot + 'drn_d_22-4bd2f8ea.pth',
-    'drn-d-38': webroot + 'drn_d_38-eebb45f0.pth',
-    'drn-d-54': webroot + 'drn_d_54-0e0534ff.pth',
-    'drn-d-105': webroot + 'drn_d_105-12b40979.pth'
-}
-
-
-def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1):
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=padding, bias=False, dilation=dilation)
-
-
-class BasicBlock(nn.Module):
-    expansion = 1
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None,
-                 dilation=(1, 1), residual=True, BatchNorm=None):
-        super(BasicBlock, self).__init__()
-        self.conv1 = conv3x3(inplanes, planes, stride,
-                             padding=dilation[0], dilation=dilation[0])
-        self.bn1 = BatchNorm(planes)
-        self.relu = nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(planes, planes,
-                             padding=dilation[1], dilation=dilation[1])
-        self.bn2 = BatchNorm(planes)
-        self.downsample = downsample
-        self.stride = stride
-        self.residual = residual
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-        if self.residual:
-            out += residual
-        out = self.relu(out)
-
-        return out
-
-
-class Bottleneck(nn.Module):
-    expansion = 4
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None,
-                 dilation=(1, 1), residual=True, BatchNorm=None):
-        super(Bottleneck, self).__init__()
-        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
-        self.bn1 = BatchNorm(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
-                               padding=dilation[1], bias=False,
-                               dilation=dilation[1])
-        self.bn2 = BatchNorm(planes)
-        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
-        self.bn3 = BatchNorm(planes * 4)
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-
-
-class DRN(nn.Module):
-
-    def __init__(self, block, layers, arch='D',
-                 channels=(16, 32, 64, 128, 256, 512, 512, 512),
-                 BatchNorm=None):
-        super(DRN, self).__init__()
-        self.inplanes = channels[0]
-        self.out_dim = channels[-1]
-        self.arch = arch
-
-        if arch == 'C':
-            self.conv1 = nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
-                                   padding=3, bias=False)
-            self.bn1 = BatchNorm(channels[0])
-            self.relu = nn.ReLU(inplace=True)
-
-            self.layer1 = self._make_layer(
-                BasicBlock, channels[0], layers[0], stride=1, BatchNorm=BatchNorm)
-            self.layer2 = self._make_layer(
-                BasicBlock, channels[1], layers[1], stride=2, BatchNorm=BatchNorm)
-
-        elif arch == 'D':
-            self.layer0 = nn.Sequential(
-                nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3,
-                          bias=False),
-                BatchNorm(channels[0]),
-                nn.ReLU(inplace=True)
-            )
-
-            self.layer1 = self._make_conv_layers(
-                channels[0], layers[0], stride=1, BatchNorm=BatchNorm)
-            self.layer2 = self._make_conv_layers(
-                channels[1], layers[1], stride=2, BatchNorm=BatchNorm)
-
-        self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2, BatchNorm=BatchNorm)
-        self.layer4 = self._make_layer(block, channels[3], layers[3], stride=2, BatchNorm=BatchNorm)
-        self.layer5 = self._make_layer(block, channels[4], layers[4],
-                                       dilation=2, new_level=False, BatchNorm=BatchNorm)
-        self.layer6 = None if layers[5] == 0 else \
-            self._make_layer(block, channels[5], layers[5], dilation=4,
-                             new_level=False, BatchNorm=BatchNorm)
-
-        if arch == 'C':
-            self.layer7 = None if layers[6] == 0 else \
-                self._make_layer(BasicBlock, channels[6], layers[6], dilation=2,
-                                 new_level=False, residual=False, BatchNorm=BatchNorm)
-            self.layer8 = None if layers[7] == 0 else \
-                self._make_layer(BasicBlock, channels[7], layers[7], dilation=1,
-                                 new_level=False, residual=False, BatchNorm=BatchNorm)
-        elif arch == 'D':
-            self.layer7 = None if layers[6] == 0 else \
-                self._make_conv_layers(channels[6], layers[6], dilation=2, BatchNorm=BatchNorm)
-            self.layer8 = None if layers[7] == 0 else \
-                self._make_conv_layers(channels[7], layers[7], dilation=1, BatchNorm=BatchNorm)
-
-        self._init_weight()
-
-    def _init_weight(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-            elif isinstance(m, SynchronizedBatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.SyncBatchNorm):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-
-    def _make_layer(self, block, planes, blocks, stride=1, dilation=1,
-                    new_level=True, residual=True, BatchNorm=None):
-        assert dilation == 1 or dilation % 2 == 0
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                BatchNorm(planes * block.expansion),
-            )
-
-        layers = list()
-        layers.append(block(
-            self.inplanes, planes, stride, downsample,
-            dilation=(1, 1) if dilation == 1 else (
-                dilation // 2 if new_level else dilation, dilation),
-            residual=residual, BatchNorm=BatchNorm))
-        self.inplanes = planes * block.expansion
-        for i in range(1, blocks):
-            layers.append(block(self.inplanes, planes, residual=residual,
-                                dilation=(dilation, dilation), BatchNorm=BatchNorm))
-
-        return nn.Sequential(*layers)
-
-    def _make_conv_layers(self, channels, convs, stride=1, dilation=1, BatchNorm=None):
-        modules = []
-        for i in range(convs):
-            modules.extend([
-                nn.Conv2d(self.inplanes, channels, kernel_size=3,
-                          stride=stride if i == 0 else 1,
-                          padding=dilation, bias=False, dilation=dilation),
-                BatchNorm(channels),
-                nn.ReLU(inplace=True)])
-            self.inplanes = channels
-        return nn.Sequential(*modules)
-
-    def forward(self, x):
-        if self.arch == 'C':
-            x = self.conv1(x)
-            x = self.bn1(x)
-            x = self.relu(x)
-        elif self.arch == 'D':
-            x = self.layer0(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-
-        x = self.layer3(x)
-        low_level_feat = x
-
-        x = self.layer4(x)
-        x = self.layer5(x)
-
-        if self.layer6 is not None:
-            x = self.layer6(x)
-
-        if self.layer7 is not None:
-            x = self.layer7(x)
-
-        if self.layer8 is not None:
-            x = self.layer8(x)
-
-        return x, low_level_feat
-
-
-class DRN_A(nn.Module):
-
-    def __init__(self, block, layers, BatchNorm=None):
-        self.inplanes = 64
-        super(DRN_A, self).__init__()
-        self.out_dim = 512 * block.expansion
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
-                               bias=False)
-        self.bn1 = BatchNorm(64)
-        self.relu = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.layer1 = self._make_layer(block, 64, layers[0], BatchNorm=BatchNorm)
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, BatchNorm=BatchNorm)
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
-                                       dilation=2, BatchNorm=BatchNorm)
-        self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
-                                       dilation=4, BatchNorm=BatchNorm)
-
-        self._init_weight()
-
-    def _init_weight(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-            elif isinstance(m, SynchronizedBatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                BatchNorm(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, downsample, BatchNorm=BatchNorm))
-        self.inplanes = planes * block.expansion
-        for i in range(1, blocks):
-            layers.append(block(self.inplanes, planes,
-                                dilation=(dilation, dilation, ), BatchNorm=BatchNorm))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        return x
-
-def drn_a_50(BatchNorm, pretrained=True):
-    model = DRN_A(Bottleneck, [3, 4, 6, 3], BatchNorm=BatchNorm)
-    if pretrained:
-        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
-    return model
-
-
-def drn_c_26(BatchNorm, pretrained=True):
-    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C', BatchNorm=BatchNorm)
-    if pretrained:
-        pretrained = model_zoo.load_url(model_urls['drn-c-26'])
-        del pretrained['fc.weight']
-        del pretrained['fc.bias']
-        model.load_state_dict(pretrained)
-    return model
-
-
-def drn_c_42(BatchNorm, pretrained=True):
-    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', BatchNorm=BatchNorm)
-    if pretrained:
-        pretrained = model_zoo.load_url(model_urls['drn-c-42'])
-        del pretrained['fc.weight']
-        del pretrained['fc.bias']
-        model.load_state_dict(pretrained)
-    return model
-
-
-def drn_c_58(BatchNorm, pretrained=True):
-    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', BatchNorm=BatchNorm)
-    if pretrained:
-        pretrained = model_zoo.load_url(model_urls['drn-c-58'])
-        del pretrained['fc.weight']
-        del pretrained['fc.bias']
-        model.load_state_dict(pretrained)
-    return model
-
-
-def drn_d_22(BatchNorm, pretrained=True):
-    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='D', BatchNorm=BatchNorm)
-    if pretrained:
-        pretrained = model_zoo.load_url(model_urls['drn-d-22'])
-        del pretrained['fc.weight']
-        del pretrained['fc.bias']
-        model.load_state_dict(pretrained)
-    return model
-
-
-def drn_d_24(BatchNorm, pretrained=True):
-    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 2, 2], arch='D', BatchNorm=BatchNorm)
-    if pretrained:
-        pretrained = model_zoo.load_url(model_urls['drn-d-24'])
-        del pretrained['fc.weight']
-        del pretrained['fc.bias']
-        model.load_state_dict(pretrained)
-    return model
-
-
-def drn_d_38(BatchNorm, pretrained=True):
-    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', BatchNorm=BatchNorm)
-    if pretrained:
-        pretrained = model_zoo.load_url(model_urls['drn-d-38'])
-        del pretrained['fc.weight']
-        del pretrained['fc.bias']
-        model.load_state_dict(pretrained)
-    return model
-
-
-def drn_d_40(BatchNorm, pretrained=True):
-    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', BatchNorm=BatchNorm)
-    if pretrained:
-        pretrained = model_zoo.load_url(model_urls['drn-d-40'])
-        del pretrained['fc.weight']
-        del pretrained['fc.bias']
-        model.load_state_dict(pretrained)
-    return model
-
-
-def drn_d_54(BatchNorm, pretrained=True):
-    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', BatchNorm=BatchNorm)
-    if pretrained:
-        pretrained = model_zoo.load_url(model_urls['drn-d-54'])
-        del pretrained['fc.weight']
-        del pretrained['fc.bias']
-        model.load_state_dict(pretrained)
-    return model
-
-
-def drn_d_105(BatchNorm, pretrained=True):
-    model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 1, 1], arch='D', BatchNorm=BatchNorm)
-    if pretrained:
-        pretrained = model_zoo.load_url(model_urls['drn-d-105'])
-        del pretrained['fc.weight']
-        del pretrained['fc.bias']
-        model.load_state_dict(pretrained)
-    return model
-
-if __name__ == "__main__":
-    import torch
-    model = drn_a_50(BatchNorm=nn.BatchNorm2d, pretrained=True)
-    input = torch.rand(1, 3, 512, 512)
-    output, low_level_feat = model(input)
-    print(output.size())
-    print(low_level_feat.size())
diff --git a/models/modeling/backbone/mobilenet.py b/models/modeling/backbone/mobilenet.py
deleted file mode 100644
index 3e7a217..0000000
--- a/models/modeling/backbone/mobilenet.py
+++ /dev/null
@@ -1,154 +0,0 @@
-import torch
-import torch.nn.functional as F
-import torch.nn as nn
-import math
-from models.modeling.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
-import torch.utils.model_zoo as model_zoo
-
-def conv_bn(inp, oup, stride, BatchNorm):
-    return nn.Sequential(
-        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
-        BatchNorm(oup),
-        nn.ReLU6(inplace=True)
-    )
-
-
-def fixed_padding(inputs, kernel_size, dilation):
-    kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1)
-    pad_total = kernel_size_effective - 1
-    pad_beg = pad_total // 2
-    pad_end = pad_total - pad_beg
-    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
-    return padded_inputs
-
-
-class InvertedResidual(nn.Module):
-    def __init__(self, inp, oup, stride, dilation, expand_ratio, BatchNorm):
-        super(InvertedResidual, self).__init__()
-        self.stride = stride
-        assert stride in [1, 2]
-
-        hidden_dim = round(inp * expand_ratio)
-        self.use_res_connect = self.stride == 1 and inp == oup
-        self.kernel_size = 3
-        self.dilation = dilation
-
-        if expand_ratio == 1:
-            self.conv = nn.Sequential(
-                # dw
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
-                BatchNorm(hidden_dim),
-                nn.ReLU6(inplace=True),
-                # pw-linear
-                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, 1, bias=False),
-                BatchNorm(oup),
-            )
-        else:
-            self.conv = nn.Sequential(
-                # pw
-                nn.Conv2d(inp, hidden_dim, 1, 1, 0, 1, bias=False),
-                BatchNorm(hidden_dim),
-                nn.ReLU6(inplace=True),
-                # dw
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False),
-                BatchNorm(hidden_dim),
-                nn.ReLU6(inplace=True),
-                # pw-linear
-                nn.Conv2d(hidden_dim, oup, 1, 1, 0, 1, bias=False),
-                BatchNorm(oup),
-            )
-
-    def forward(self, x):
-        x_pad = fixed_padding(x, self.kernel_size, dilation=self.dilation)
-        if self.use_res_connect:
-            x = x + self.conv(x_pad)
-        else:
-            x = self.conv(x_pad)
-        return x
-
-
-class MobileNetV2(nn.Module):
-    def __init__(self, output_stride=8, BatchNorm=None, width_mult=1., pretrained=True):
-        super(MobileNetV2, self).__init__()
-        block = InvertedResidual
-        input_channel = 32
-        current_stride = 1
-        rate = 1
-        interverted_residual_setting = [
-            # t, c, n, s
-            [1, 16, 1, 1],
-            [6, 24, 2, 2],
-            [6, 32, 3, 2],
-            [6, 64, 4, 2],
-            [6, 96, 3, 1],
-            [6, 160, 3, 2],
-            [6, 320, 1, 1],
-        ]
-
-        # building first layer
-        input_channel = int(input_channel * width_mult)
-        self.features = [conv_bn(3, input_channel, 2, BatchNorm)]
-        current_stride *= 2
-        # building inverted residual blocks
-        for t, c, n, s in interverted_residual_setting:
-            if current_stride == output_stride:
-                stride = 1
-                dilation = rate
-                rate *= s
-            else:
-                stride = s
-                dilation = 1
-                current_stride *= s
-            output_channel = int(c * width_mult)
-            for i in range(n):
-                if i == 0:
-                    self.features.append(block(input_channel, output_channel, stride, dilation, t, BatchNorm))
-                else:
-                    self.features.append(block(input_channel, output_channel, 1, dilation, t, BatchNorm))
-                input_channel = output_channel
-        self.features = nn.Sequential(*self.features)
-        self._initialize_weights()
-
-        if pretrained:
-            self._load_pretrained_model()
-
-        self.low_level_features = self.features[0:4]
-        self.high_level_features = self.features[4:]
-
-    def forward(self, x):
-        low_level_feat = self.low_level_features(x)
-        x = self.high_level_features(low_level_feat)
-        return x, low_level_feat
-
-    def _load_pretrained_model(self):
-        pretrain_dict = model_zoo.load_url('http://jeff95.me/models/mobilenet_v2-6a65762b.pth')
-        model_dict = {}
-        state_dict = self.state_dict()
-        for k, v in pretrain_dict.items():
-            if k in state_dict:
-                model_dict[k] = v
-        state_dict.update(model_dict)
-        self.load_state_dict(state_dict)
-
-    def _initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                # m.weight.data.normal_(0, math.sqrt(2. / n))
-                torch.nn.init.kaiming_normal_(m.weight)
-            elif isinstance(m, SynchronizedBatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.SyncBatchNorm):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-if __name__ == "__main__":
-    input = torch.rand(1, 3, 512, 512)
-    model = MobileNetV2(output_stride=16, BatchNorm=nn.BatchNorm2d)
-    output, low_level_feat = model(input)
-    print(output.size())
-    print(low_level_feat.size())
diff --git a/models/modeling/backbone/resnet.py b/models/modeling/backbone/resnet.py
deleted file mode 100644
index 69de3fc..0000000
--- a/models/modeling/backbone/resnet.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import math
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.model_zoo as model_zoo
-import torch
-from models.modeling.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
-import os
-
-class Bottleneck(nn.Module):
-    expansion = 4
-
-    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, BatchNorm=None):
-        super(Bottleneck, self).__init__()
-        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
-        self.bn1 = BatchNorm(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
-                               dilation=dilation, padding=dilation, bias=False)
-        self.bn2 = BatchNorm(planes)
-        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
-        self.bn3 = BatchNorm(planes * 4)
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.stride = stride
-        self.dilation = dilation
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-
-class ResNet(nn.Module):
-
-    def __init__(self, block, layers, output_stride, BatchNorm, pretrained=True, resnet_layers=101):
-        
-        self.resnet_layers = resnet_layers
-        self.inplanes = 64
-        super(ResNet, self).__init__()
-        blocks = [1, 2, 4]
-        if output_stride == 16:
-            strides = [1, 2, 2, 1]
-            dilations = [1, 1, 1, 2]
-        elif output_stride == 8:
-            strides = [1, 2, 1, 1]
-            dilations = [1, 1, 2, 4]
-        else:
-            raise NotImplementedError
-
-        # Modules
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
-                                bias=False)
-        self.bn1 = BatchNorm(64)
-        self.relu = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        self.layer1 = self._make_layer(block, 64, layers[0], stride=strides[0], dilation=dilations[0], BatchNorm=BatchNorm)
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=strides[1], dilation=dilations[1], BatchNorm=BatchNorm)
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=strides[2], dilation=dilations[2], BatchNorm=BatchNorm)
-        self.layer4 = self._make_MG_unit(block, 512, blocks=blocks, stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
-        # self.layer4 = self._make_layer(block, 512, layers[3], stride=strides[3], dilation=dilations[3], BatchNorm=BatchNorm)
-        self._init_weight()
-
-        if pretrained:
-            self._load_pretrained_model()
-
-    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                BatchNorm(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, dilation, downsample, BatchNorm))
-        self.inplanes = planes * block.expansion
-        for i in range(1, blocks):
-            layers.append(block(self.inplanes, planes, dilation=dilation, BatchNorm=BatchNorm))
-
-        return nn.Sequential(*layers)
-
-    def _make_MG_unit(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                BatchNorm(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, dilation=blocks[0]*dilation,
-                            downsample=downsample, BatchNorm=BatchNorm))
-        self.inplanes = planes * block.expansion
-        for i in range(1, len(blocks)):
-            layers.append(block(self.inplanes, planes, stride=1,
-                                dilation=blocks[i]*dilation, BatchNorm=BatchNorm))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, input):
-        x = self.conv1(input)
-        x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-        x = self.layer1(x)
-        low_level_feat = x
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-        return x, low_level_feat
-
-    def _init_weight(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-            elif isinstance(m, SynchronizedBatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.SyncBatchNorm):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-    def _load_pretrained_model(self):
-        # pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth')
-        if self.resnet_layers == 101:
-            path = 'pretrained/resnet101-5d3b4d8f.pth'
-        elif self.resnet_layers == 50:
-            path = 'pretrained/resnet50-19c8e357.pth'
-        else:
-            raise ValueError("{} layers not supported".format(self.resnet_layers))
-
-        if os.path.exists(path):
-            pretrain_dict = path
-        else:
-            raise ValueError("The path {} not exists".format(path))
-
-        print("load pretrained weight from {}".format(pretrain_dict))
-        pretrain_dict = torch.load(pretrain_dict)
-        model_dict = {}
-        state_dict = self.state_dict()
-        for k, v in pretrain_dict.items():
-            if k in state_dict:
-                model_dict[k] = v
-        state_dict.update(model_dict)
-        self.load_state_dict(state_dict)
-
-def ResNet101(output_stride, BatchNorm, pretrained=True):
-    """Constructs a ResNet-101 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-    """
-    model = ResNet(Bottleneck, [3, 4, 23, 3], output_stride, BatchNorm, pretrained=pretrained)
-    return model
-
-def ResNet50(output_stride, BatchNorm, pretrained=True):
-    """Constructs a ResNet-101 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-    """
-    model = ResNet(Bottleneck, [3, 4, 6, 3], output_stride, BatchNorm, pretrained=pretrained, resnet_layers=50)
-    return model
-
-if __name__ == "__main__":
-    import torch
-    model = ResNet101(BatchNorm=nn.BatchNorm2d, pretrained=True, output_stride=8)
-    input = torch.rand(1, 3, 512, 512)
-    output, low_level_feat = model(input)
-    print(output.size())
-    print(low_level_feat.size())
diff --git a/models/modeling/backbone/resnet_AuxLayers234.py b/models/modeling/backbone/resnet_AuxLayers234.py
deleted file mode 100644
index 107d128..0000000
--- a/models/modeling/backbone/resnet_AuxLayers234.py
+++ /dev/null
@@ -1,217 +0,0 @@
-import math
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.model_zoo as model_zoo
-import torch
-from models.modeling.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
-import os
-
-
-class Bottleneck(nn.Module):
-    expansion = 4
-
-    def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, BatchNorm=None):
-        super(Bottleneck, self).__init__()
-        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
-        self.bn1 = BatchNorm(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
-                               dilation=dilation, padding=dilation, bias=False)
-        self.bn2 = BatchNorm(planes)
-        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
-        self.bn3 = BatchNorm(planes * 4)
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.stride = stride
-        self.dilation = dilation
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-
-
-class ResNet_AuxLayers234(nn.Module):
-
-    def __init__(self, block, layers, output_stride, BatchNorm, pretrained=True, resnet_layers=101):
-
-        self.resnet_layers = resnet_layers
-        self.inplanes = 64
-        super(ResNet_AuxLayers234, self).__init__()
-        blocks = [1, 2, 4]
-        if output_stride == 16:
-            strides = [1, 2, 2, 1]
-            dilations = [1, 1, 1, 2]
-        elif output_stride == 8:
-            strides = [1, 2, 1, 1]
-            dilations = [1, 1, 2, 4]
-        else:
-            raise NotImplementedError
-
-        # Modules
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
-                               bias=False)
-        self.bn1 = BatchNorm(64)
-        self.relu = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        self.layer1 = self._make_layer(block, 64, layers[0], stride=strides[0], dilation=dilations[0],
-                                       BatchNorm=BatchNorm)
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=strides[1], dilation=dilations[1],
-                                       BatchNorm=BatchNorm)
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=strides[2], dilation=dilations[2],
-                                       BatchNorm=BatchNorm)
-        self.layer4 = self._make_MG_unit(block, 512, blocks=blocks, stride=strides[3], dilation=dilations[3],
-                                         BatchNorm=BatchNorm)
-        self.layer2_aux = self._make_layer(block, 128, layers[1], stride=strides[1], dilation=dilations[1],
-                                           BatchNorm=BatchNorm)
-        self.layer3_aux = self._make_layer(block, 256, layers[2], stride=strides[2], dilation=dilations[2],
-                                           BatchNorm=BatchNorm)
-        self.layer4_aux = self._make_MG_unit(block, 512, blocks=blocks, stride=strides[3], dilation=dilations[3],
-                                             BatchNorm=BatchNorm)
-        self._init_weight()
-
-        if pretrained:
-            self._load_pretrained_model()
-            import copy
-            self.layer2_aux = copy.deepcopy(self.layer2)
-            self.layer3_aux = copy.deepcopy(self.layer3)
-            self.layer4_aux = copy.deepcopy(self.layer4)
-            # print(self.layer2[0].conv1._parameters['weight'][:2, 0, :, :])
-            # print(self.layer2_aux[0].conv1._parameters['weight'][:2, 0, :, :])
-
-    def _make_layer(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                BatchNorm(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, dilation, downsample, BatchNorm))
-        self.inplanes = planes * block.expansion
-        for i in range(1, blocks):
-            layers.append(block(self.inplanes, planes, dilation=dilation, BatchNorm=BatchNorm))
-
-        return nn.Sequential(*layers)
-
-    def _make_MG_unit(self, block, planes, blocks, stride=1, dilation=1, BatchNorm=None):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                BatchNorm(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, dilation=blocks[0] * dilation,
-                            downsample=downsample, BatchNorm=BatchNorm))
-        self.inplanes = planes * block.expansion
-        for i in range(1, len(blocks)):
-            layers.append(block(self.inplanes, planes, stride=1,
-                                dilation=blocks[i] * dilation, BatchNorm=BatchNorm))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, input):
-        x = self.conv1(input)
-        x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-        x = self.layer1(x)
-        low_level_feat = x
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-        # return x, low_level_feat
-        x_aux = self.layer2_aux(low_level_feat)
-        x_aux = self.layer3_aux(x_aux)
-        x_aux = self.layer4_aux(x_aux)
-        return x, low_level_feat, x_aux
-
-    def _init_weight(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-            elif isinstance(m, SynchronizedBatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.SyncBatchNorm):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-    def _load_pretrained_model(self):
-        # pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth')
-        if self.resnet_layers == 101:
-            path = 'pretrained/resnet101-5d3b4d8f.pth'
-        elif self.resnet_layers == 50:
-            path = 'pretrained/resnet50-19c8e357.pth'
-        else:
-            raise ValueError("{} layers not supported".format(self.resnet_layers))
-
-        if os.path.exists(path):
-            pretrain_dict = path
-        else:
-            raise ValueError("The path {} not exists".format(path))
-
-        print("load pretrained weight from {}".format(pretrain_dict))
-        pretrain_dict = torch.load(pretrain_dict)
-        model_dict = {}
-        state_dict = self.state_dict()
-        for k, v in pretrain_dict.items():
-            if k in state_dict:
-                model_dict[k] = v
-        state_dict.update(model_dict)
-        self.load_state_dict(state_dict)
-
-
-def ResNet101_AuxLayers234(output_stride, BatchNorm, pretrained=True):
-    """Constructs a ResNet-101 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-    """
-    model = ResNet_AuxLayers234(Bottleneck, [3, 4, 23, 3], output_stride, BatchNorm, pretrained=pretrained)
-    return model
-
-
-def ResNet50_AuxLayers234(output_stride, BatchNorm, pretrained=True):
-    """Constructs a ResNet-101 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-    """
-    model = ResNet_AuxLayers234(Bottleneck, [3, 4, 6, 3], output_stride, BatchNorm, pretrained=pretrained,
-                                resnet_layers=50)
-    return model
-
-
-if __name__ == "__main__":
-    import torch
-
-    model = ResNet101_AuxLayers234(BatchNorm=nn.BatchNorm2d, pretrained=True, output_stride=8)
-    input = torch.rand(1, 3, 512, 512)
-    output, low_level_feat = model(input)
-    print(output.size())
-    print(low_level_feat.size())
diff --git a/models/modeling/backbone/xception.py b/models/modeling/backbone/xception.py
deleted file mode 100644
index f0cb255..0000000
--- a/models/modeling/backbone/xception.py
+++ /dev/null
@@ -1,291 +0,0 @@
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.model_zoo as model_zoo
-from models.modeling.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
-
-def fixed_padding(inputs, kernel_size, dilation):
-    kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1)
-    pad_total = kernel_size_effective - 1
-    pad_beg = pad_total // 2
-    pad_end = pad_total - pad_beg
-    padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end))
-    return padded_inputs
-
-
-class SeparableConv2d(nn.Module):
-    def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False, BatchNorm=None):
-        super(SeparableConv2d, self).__init__()
-
-        self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, 0, dilation,
-                               groups=inplanes, bias=bias)
-        self.bn = BatchNorm(inplanes)
-        self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias)
-
-    def forward(self, x):
-        x = fixed_padding(x, self.conv1.kernel_size[0], dilation=self.conv1.dilation[0])
-        x = self.conv1(x)
-        x = self.bn(x)
-        x = self.pointwise(x)
-        return x
-
-
-class Block(nn.Module):
-    def __init__(self, inplanes, planes, reps, stride=1, dilation=1, BatchNorm=None,
-                 start_with_relu=True, grow_first=True, is_last=False):
-        super(Block, self).__init__()
-
-        if planes != inplanes or stride != 1:
-            self.skip = nn.Conv2d(inplanes, planes, 1, stride=stride, bias=False)
-            self.skipbn = BatchNorm(planes)
-        else:
-            self.skip = None
-
-        self.relu = nn.ReLU(inplace=True)
-        rep = []
-
-        filters = inplanes
-        if grow_first:
-            rep.append(self.relu)
-            rep.append(SeparableConv2d(inplanes, planes, 3, 1, dilation, BatchNorm=BatchNorm))
-            rep.append(BatchNorm(planes))
-            filters = planes
-
-        for i in range(reps - 1):
-            rep.append(self.relu)
-            rep.append(SeparableConv2d(filters, filters, 3, 1, dilation, BatchNorm=BatchNorm))
-            rep.append(BatchNorm(filters))
-
-        if not grow_first:
-            rep.append(self.relu)
-            rep.append(SeparableConv2d(inplanes, planes, 3, 1, dilation, BatchNorm=BatchNorm))
-            rep.append(BatchNorm(planes))
-
-        if stride != 1:
-            rep.append(self.relu)
-            rep.append(SeparableConv2d(planes, planes, 3, 2, BatchNorm=BatchNorm))
-            rep.append(BatchNorm(planes))
-
-        if stride == 1 and is_last:
-            rep.append(self.relu)
-            rep.append(SeparableConv2d(planes, planes, 3, 1, BatchNorm=BatchNorm))
-            rep.append(BatchNorm(planes))
-
-        if not start_with_relu:
-            rep = rep[1:]
-
-        self.rep = nn.Sequential(*rep)
-
-    def forward(self, inp):
-        x = self.rep(inp)
-
-        if self.skip is not None:
-            skip = self.skip(inp)
-            skip = self.skipbn(skip)
-        else:
-            skip = inp
-
-        x = x + skip
-
-        return x
-
-
-class AlignedXception(nn.Module):
-    """
-    Modified Alighed Xception
-    """
-    def __init__(self, output_stride, BatchNorm,
-                 pretrained=True):
-        super(AlignedXception, self).__init__()
-
-        if output_stride == 16:
-            entry_block3_stride = 2
-            middle_block_dilation = 1
-            exit_block_dilations = (1, 2)
-        elif output_stride == 8:
-            entry_block3_stride = 1
-            middle_block_dilation = 2
-            exit_block_dilations = (2, 4)
-        else:
-            raise NotImplementedError
-
-
-        # Entry flow
-        self.conv1 = nn.Conv2d(3, 32, 3, stride=2, padding=1, bias=False)
-        self.bn1 = BatchNorm(32)
-        self.relu = nn.ReLU(inplace=True)
-
-        self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1, bias=False)
-        self.bn2 = BatchNorm(64)
-
-        self.block1 = Block(64, 128, reps=2, stride=2, BatchNorm=BatchNorm, start_with_relu=False)
-        self.block2 = Block(128, 256, reps=2, stride=2, BatchNorm=BatchNorm, start_with_relu=False,
-                            grow_first=True)
-        self.block3 = Block(256, 728, reps=2, stride=entry_block3_stride, BatchNorm=BatchNorm,
-                            start_with_relu=True, grow_first=True, is_last=True)
-
-        # Middle flow
-        self.block4  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block5  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block6  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block7  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block8  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block9  = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block10 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block11 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block12 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block13 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block14 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block15 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block16 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block17 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block18 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-        self.block19 = Block(728, 728, reps=3, stride=1, dilation=middle_block_dilation,
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=True)
-
-        # Exit flow
-        self.block20 = Block(728, 1024, reps=2, stride=1, dilation=exit_block_dilations[0],
-                             BatchNorm=BatchNorm, start_with_relu=True, grow_first=False, is_last=True)
-
-        self.conv3 = SeparableConv2d(1024, 1536, 3, stride=1, dilation=exit_block_dilations[1], BatchNorm=BatchNorm)
-        self.bn3 = BatchNorm(1536)
-
-        self.conv4 = SeparableConv2d(1536, 1536, 3, stride=1, dilation=exit_block_dilations[1], BatchNorm=BatchNorm)
-        self.bn4 = BatchNorm(1536)
-
-        self.conv5 = SeparableConv2d(1536, 2048, 3, stride=1, dilation=exit_block_dilations[1], BatchNorm=BatchNorm)
-        self.bn5 = BatchNorm(2048)
-
-        # Init weights
-        self._init_weight()
-
-        # Load pretrained model
-        if pretrained:
-            self._load_pretrained_model()
-
-    def forward(self, x):
-        # Entry flow
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-
-        x = self.conv2(x)
-        x = self.bn2(x)
-        x = self.relu(x)
-
-        x = self.block1(x)
-        # add relu here
-        x = self.relu(x)
-        low_level_feat = x
-        x = self.block2(x)
-        x = self.block3(x)
-
-        # Middle flow
-        x = self.block4(x)
-        x = self.block5(x)
-        x = self.block6(x)
-        x = self.block7(x)
-        x = self.block8(x)
-        x = self.block9(x)
-        x = self.block10(x)
-        x = self.block11(x)
-        x = self.block12(x)
-        x = self.block13(x)
-        x = self.block14(x)
-        x = self.block15(x)
-        x = self.block16(x)
-        x = self.block17(x)
-        x = self.block18(x)
-        x = self.block19(x)
-
-        # Exit flow
-        x = self.block20(x)
-        x = self.relu(x)
-        x = self.conv3(x)
-        x = self.bn3(x)
-        x = self.relu(x)
-
-        x = self.conv4(x)
-        x = self.bn4(x)
-        x = self.relu(x)
-
-        x = self.conv5(x)
-        x = self.bn5(x)
-        x = self.relu(x)
-
-        return x, low_level_feat
-
-    def _init_weight(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-            elif isinstance(m, SynchronizedBatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.SyncBatchNorm):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-
-    def _load_pretrained_model(self):
-        pretrain_dict = model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/xception-b5690688.pth')
-        model_dict = {}
-        state_dict = self.state_dict()
-
-        for k, v in pretrain_dict.items():
-            if k in state_dict:
-                if 'pointwise' in k:
-                    v = v.unsqueeze(-1).unsqueeze(-1)
-                if k.startswith('block11'):
-                    model_dict[k] = v
-                    model_dict[k.replace('block11', 'block12')] = v
-                    model_dict[k.replace('block11', 'block13')] = v
-                    model_dict[k.replace('block11', 'block14')] = v
-                    model_dict[k.replace('block11', 'block15')] = v
-                    model_dict[k.replace('block11', 'block16')] = v
-                    model_dict[k.replace('block11', 'block17')] = v
-                    model_dict[k.replace('block11', 'block18')] = v
-                    model_dict[k.replace('block11', 'block19')] = v
-                elif k.startswith('block12'):
-                    model_dict[k.replace('block12', 'block20')] = v
-                elif k.startswith('bn3'):
-                    model_dict[k] = v
-                    model_dict[k.replace('bn3', 'bn4')] = v
-                elif k.startswith('conv4'):
-                    model_dict[k.replace('conv4', 'conv5')] = v
-                elif k.startswith('bn4'):
-                    model_dict[k.replace('bn4', 'bn5')] = v
-                else:
-                    model_dict[k] = v
-        state_dict.update(model_dict)
-        self.load_state_dict(state_dict)
-
-
-
-if __name__ == "__main__":
-    import torch
-    model = AlignedXception(BatchNorm=nn.BatchNorm2d, pretrained=True, output_stride=16)
-    input = torch.rand(1, 3, 512, 512)
-    output, low_level_feat = model(input)
-    print(output.size())
-    print(low_level_feat.size())
diff --git a/models/modeling/backbones/get_pretrained_model.sh b/models/modeling/backbones/get_pretrained_model.sh
deleted file mode 100644
index db6853c..0000000
--- a/models/modeling/backbones/get_pretrained_model.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-FILENAME="models/backbones/pretrained/3x3resnet50-imagenet.pth"
-
-mkdir -p models/backbones/pretrained
-wget https://github.com/yassouali/CCT/releases/download/v0.1/3x3resnet50-imagenet.pth -O $FILENAME
diff --git a/models/modeling/backbones/module_helper.py b/models/modeling/backbones/module_helper.py
deleted file mode 100644
index 3d61df8..0000000
--- a/models/modeling/backbones/module_helper.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env python
-# -*- coding:utf-8 -*-
-# Author: Donny You (youansheng@gmail.com)
-
-
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-try:
-    from urllib import urlretrieve
-except ImportError:
-    from urllib.request import urlretrieve
-
-class FixedBatchNorm(nn.BatchNorm2d):
-    def forward(self, input):
-        return F.batch_norm(input, self.running_mean, self.running_var, self.weight, self.bias, training=False, eps=self.eps)
-
-class ModuleHelper(object):
-
-    @staticmethod
-    def BNReLU(num_features, norm_type=None, **kwargs):
-        if norm_type == 'batchnorm':
-            return nn.Sequential(
-                nn.BatchNorm2d(num_features, **kwargs),
-                nn.ReLU()
-            )
-        elif norm_type == 'encsync_batchnorm':
-            from encoding.nn import BatchNorm2d
-            return nn.Sequential(
-                BatchNorm2d(num_features, **kwargs),
-                nn.ReLU()
-            )
-        elif norm_type == 'instancenorm':
-            return nn.Sequential(
-                nn.InstanceNorm2d(num_features, **kwargs),
-                nn.ReLU()
-            )
-        elif norm_type == 'fixed_batchnorm':
-            return nn.Sequential(
-                FixedBatchNorm(num_features, **kwargs),
-                nn.ReLU()
-            )
-        else:
-            raise ValueError('Not support BN type: {}.'.format(norm_type))
-
-    @staticmethod
-    def BatchNorm3d(norm_type=None, ret_cls=False):
-        if norm_type == 'batchnorm':
-            return nn.BatchNorm3d
-        elif norm_type == 'encsync_batchnorm':
-            from encoding.nn import BatchNorm3d
-            return BatchNorm3d
-        elif norm_type == 'instancenorm':
-            return nn.InstanceNorm3d
-        else:
-            raise ValueError('Not support BN type: {}.'.format(norm_type))
-
-    @staticmethod
-    def BatchNorm2d(norm_type=None, ret_cls=False):
-        if norm_type == 'batchnorm':
-            return nn.BatchNorm2d
-        elif norm_type == 'encsync_batchnorm':
-            from encoding.nn import BatchNorm2d
-            print("************************encoding syncbn************************")
-            return BatchNorm2d
-        elif norm_type == 'lib':
-            print("************************lib syncbn************************")
-            from lib.sync_bn.modules import BatchNorm2d
-            return BatchNorm2d
-        elif norm_type == 'instancenorm':
-            return nn.InstanceNorm2d
-        else:
-            raise ValueError('Not support BN type: {}.'.format(norm_type))
-
-    @staticmethod
-    def BatchNorm1d(norm_type=None, ret_cls=False):
-        if norm_type == 'batchnorm':
-            return nn.BatchNorm1d
-        elif norm_type == 'encsync_batchnorm':
-            from encoding.nn import BatchNorm1d
-            return BatchNorm1d
-        elif norm_type == 'instancenorm':
-            return nn.InstanceNorm1d
-        else:
-            raise ValueError('Not support BN type: {}.'.format(norm_type))
-
-    @staticmethod
-    def load_model(model, pretrained=None, all_match=True, map_location='cpu'):
-        if pretrained is None:
-            return model
-
-        if not os.path.exists(pretrained):
-            print('{} not exists.'.format(pretrained))
-            return model
-
-        print('Loading pretrained model:{}'.format(pretrained))
-        if all_match:
-            pretrained_dict = torch.load(pretrained, map_location=map_location)
-            model_dict = model.state_dict()
-            load_dict = dict()
-            for k, v in pretrained_dict.items():
-                if 'prefix.{}'.format(k) in model_dict:
-                    load_dict['prefix.{}'.format(k)] = v
-                else:
-                    load_dict[k] = v
-            model.load_state_dict(load_dict)
-
-        else:
-            pretrained_dict = torch.load(pretrained)
-            model_dict = model.state_dict()
-            load_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
-            print('Matched Keys: {}'.format(load_dict.keys()))
-            model_dict.update(load_dict)
-            model.load_state_dict(model_dict)
-
-        return model
-
-    @staticmethod
-    def load_url(url, map_location=None):
-        model_dir = os.path.join('~', '.TorchCV', 'model')
-        if not os.path.exists(model_dir):
-            os.makedirs(model_dir)
-
-        filename = url.split('/')[-1]
-        cached_file = os.path.join(model_dir, filename)
-        if not os.path.exists(cached_file):
-            print('Downloading: "{}" to {}\n'.format(url, cached_file))
-            urlretrieve(url, cached_file)
-
-        print('Loading pretrained model:{}'.format(cached_file))
-        return torch.load(cached_file, map_location=map_location)
-
-    @staticmethod
-    def constant_init(module, val, bias=0):
-        nn.init.constant_(module.weight, val)
-        if hasattr(module, 'bias') and module.bias is not None:
-            nn.init.constant_(module.bias, bias)
-
-    @staticmethod
-    def xavier_init(module, gain=1, bias=0, distribution='normal'):
-        assert distribution in ['uniform', 'normal']
-        if distribution == 'uniform':
-            nn.init.xavier_uniform_(module.weight, gain=gain)
-        else:
-            nn.init.xavier_normal_(module.weight, gain=gain)
-        if hasattr(module, 'bias') and module.bias is not None:
-            nn.init.constant_(module.bias, bias)
-
-    @staticmethod
-    def normal_init(module, mean=0, std=1, bias=0):
-        nn.init.normal_(module.weight, mean, std)
-        if hasattr(module, 'bias') and module.bias is not None:
-            nn.init.constant_(module.bias, bias)
-
-    @staticmethod
-    def uniform_init(module, a=0, b=1, bias=0):
-        nn.init.uniform_(module.weight, a, b)
-        if hasattr(module, 'bias') and module.bias is not None:
-            nn.init.constant_(module.bias, bias)
-
-    @staticmethod
-    def kaiming_init(module,
-                     mode='fan_in',
-                     nonlinearity='leaky_relu',
-                     bias=0,
-                     distribution='normal'):
-        assert distribution in ['uniform', 'normal']
-        if distribution == 'uniform':
-            nn.init.kaiming_uniform_(
-                module.weight, mode=mode, nonlinearity=nonlinearity)
-        else:
-            nn.init.kaiming_normal_(
-                module.weight, mode=mode, nonlinearity=nonlinearity)
-        if hasattr(module, 'bias') and module.bias is not None:
-            nn.init.constant_(module.bias, bias)
-
diff --git a/models/modeling/backbones/resnet_backbone.py b/models/modeling/backbones/resnet_backbone.py
deleted file mode 100644
index 8aeaf40..0000000
--- a/models/modeling/backbones/resnet_backbone.py
+++ /dev/null
@@ -1,202 +0,0 @@
-#!/usr/bin/env python
-# -*- coding:utf-8 -*-
-# Author: Donny You(youansheng@gmail.com)
-
-
-import torch.nn as nn
-from models.backbones.resnet_models import *
-
-
-class NormalResnetBackbone(nn.Module):
-    def __init__(self, orig_resnet):
-        super(NormalResnetBackbone, self).__init__()
-
-        self.num_features = 2048
-        # take pretrained resnet, except AvgPool and FC
-        self.prefix = orig_resnet.prefix
-        self.maxpool = orig_resnet.maxpool
-        self.layer1 = orig_resnet.layer1
-        self.layer2 = orig_resnet.layer2
-        self.layer3 = orig_resnet.layer3
-        self.layer4 = orig_resnet.layer4
-
-    def get_num_features(self):
-        return self.num_features
-
-    def forward(self, x):
-        tuple_features = list()
-        x = self.prefix(x)
-        x = self.maxpool(x)
-        x = self.layer1(x)
-        tuple_features.append(x)
-        x = self.layer2(x)
-        tuple_features.append(x)
-        x = self.layer3(x)
-        tuple_features.append(x)
-        x = self.layer4(x)
-        tuple_features.append(x)
-
-        return tuple_features
-
-
-class DilatedResnetBackbone(nn.Module):
-    def __init__(self, orig_resnet, dilate_scale=8, multi_grid=(1, 2, 4)):
-        super(DilatedResnetBackbone, self).__init__()
-
-        self.num_features = 2048
-        from functools import partial
-
-        if dilate_scale == 8:
-            orig_resnet.layer3.apply(partial(self._nostride_dilate, dilate=2))
-            if multi_grid is None:
-                orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=4))
-            else:
-                for i, r in enumerate(multi_grid):
-                    orig_resnet.layer4[i].apply(partial(self._nostride_dilate, dilate=int(4 * r)))
-
-        elif dilate_scale == 16:
-            if multi_grid is None:
-                orig_resnet.layer4.apply(partial(self._nostride_dilate, dilate=2))
-            else:
-                for i, r in enumerate(multi_grid):
-                    orig_resnet.layer4[i].apply(partial(self._nostride_dilate, dilate=int(2 * r)))
-
-        # Take pretrained resnet, except AvgPool and FC
-        self.prefix = orig_resnet.prefix
-        self.maxpool = orig_resnet.maxpool
-        self.layer1 = orig_resnet.layer1
-        self.layer2 = orig_resnet.layer2
-        self.layer3 = orig_resnet.layer3
-        self.layer4 = orig_resnet.layer4
-
-    def _nostride_dilate(self, m, dilate):
-        classname = m.__class__.__name__
-        if classname.find('Conv') != -1:
-            # the convolution with stride
-            if m.stride == (2, 2):
-                m.stride = (1, 1)
-                if m.kernel_size == (3, 3):
-                    m.dilation = (dilate // 2, dilate // 2)
-                    m.padding = (dilate // 2, dilate // 2)
-            # other convoluions
-            else:
-                if m.kernel_size == (3, 3):
-                    m.dilation = (dilate, dilate)
-                    m.padding = (dilate, dilate)
-
-    def get_num_features(self):
-        return self.num_features
-
-    def forward(self, x):
-        tuple_features = list()
-        
-        # print("1: {}".format(x.size())) #[320]
-
-        x = self.prefix(x)
-
-        # print("2: {}".format(x.size())) #160
-
-        x = self.maxpool(x)
-
-        # print("3: {}".format(x.size())) #80
-
-        x = self.layer1(x)
-
-        # print("4: {}".format(x.size())) #80
-
-        tuple_features.append(x)
-        x = self.layer2(x)
-
-        # print("5: {}".format(x.size())) #40
-
-        tuple_features.append(x)
-        x = self.layer3(x)
-        
-        # print("6: {}".format(x.size())) #40
-
-        tuple_features.append(x)
-        x = self.layer4(x)
-
-        # print("7: {}".format(x.size())) #40
-        # input()
-
-        tuple_features.append(x)
-
-        return tuple_features
-
-
-def ResNetBackbone(backbone=None, pretrained=None, multi_grid=None, norm_type='batchnorm'):
-    arch =  backbone
-    if arch == 'resnet34':
-        orig_resnet = resnet34(pretrained=pretrained)
-        arch_net = NormalResnetBackbone(orig_resnet)
-        arch_net.num_features = 512
-
-    elif arch == 'resnet34_dilated8':
-        orig_resnet = resnet34(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
-        arch_net.num_features = 512
-
-    elif arch == 'resnet34_dilated16':
-        orig_resnet = resnet34(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
-        arch_net.num_features = 512
-
-    elif arch == 'resnet50':
-        orig_resnet = resnet50(pretrained=pretrained)
-        arch_net = NormalResnetBackbone(orig_resnet)
-
-    elif arch == 'resnet50_dilated8':
-        orig_resnet = resnet50(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
-
-    elif arch == 'resnet50_dilated16':
-        orig_resnet = resnet50(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
-
-    elif arch == 'deepbase_resnet50':
-        if pretrained:
-            pretrained = 'models/backbones/pretrained/3x3resnet50-imagenet.pth'
-        orig_resnet = deepbase_resnet50(pretrained=pretrained)
-        arch_net = NormalResnetBackbone(orig_resnet)
-
-    elif arch == 'deepbase_resnet50_dilated8':
-        if pretrained:
-            pretrained = 'models/backbones/pretrained/3x3resnet50-imagenet.pth'
-        orig_resnet = deepbase_resnet50(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
-
-    elif arch == 'deepbase_resnet50_dilated16':
-        orig_resnet = deepbase_resnet50(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
-
-    elif arch == 'resnet101':
-        orig_resnet = resnet101(pretrained=pretrained)
-        arch_net = NormalResnetBackbone(orig_resnet)
-
-    elif arch == 'resnet101_dilated8':
-        orig_resnet = resnet101(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
-
-    elif arch == 'resnet101_dilated16':
-        orig_resnet = resnet101(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
-
-    elif arch == 'deepbase_resnet101':
-        orig_resnet = deepbase_resnet101(pretrained=pretrained)
-        arch_net = NormalResnetBackbone(orig_resnet)
-
-    elif arch == 'deepbase_resnet101_dilated8':
-        if pretrained:
-            pretrained = 'models/backbones/pretrained/3x3resnet101-imagenet.pth'
-        orig_resnet = deepbase_resnet101(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=8, multi_grid=multi_grid)
-
-    elif arch == 'deepbase_resnet101_dilated16':
-        orig_resnet = deepbase_resnet101(pretrained=pretrained)
-        arch_net = DilatedResnetBackbone(orig_resnet, dilate_scale=16, multi_grid=multi_grid)
-
-    else:
-        raise Exception('Architecture undefined!')
-
-    return arch_net
diff --git a/models/modeling/backbones/resnet_models.py b/models/modeling/backbones/resnet_models.py
deleted file mode 100644
index 302d24b..0000000
--- a/models/modeling/backbones/resnet_models.py
+++ /dev/null
@@ -1,269 +0,0 @@
-#!/usr/bin/env python
-# -*- coding:utf-8 -*-
-# Author: Donny You(youansheng@gmail.com)
-
-
-import math
-import torch.nn as nn
-from collections import OrderedDict
-
-from models.backbones.module_helper import ModuleHelper
-
-
-model_urls = {
-    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
-    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
-    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
-    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
-    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
-}
-
-
-def conv3x3(in_planes, out_planes, stride=1):
-    "3x3 convolution with padding"
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=1, bias=False)
-
-
-class BasicBlock(nn.Module):
-    expansion = 1
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_type=None):
-        super(BasicBlock, self).__init__()
-        self.conv1 = conv3x3(inplanes, planes, stride)
-        self.bn1 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes)
-        self.relu = nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(planes, planes)
-        self.bn2 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-
-
-class Bottleneck(nn.Module):
-    expansion = 4
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_type=None):
-        super(Bottleneck, self).__init__()
-        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
-        self.bn1 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes)
-        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
-                               padding=1, bias=False)
-        self.bn2 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes)
-        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
-        self.bn3 = ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes * 4)
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = self.relu(out)
-
-        return out
-
-
-class ResNet(nn.Module):
-
-    def __init__(self, block, layers, num_classes=1000, deep_base=False, norm_type=None):
-        super(ResNet, self).__init__()
-        self.inplanes = 128 if deep_base else 64
-        if deep_base:
-            self.prefix = nn.Sequential(OrderedDict([
-                ('conv1', nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)),
-                ('bn1', ModuleHelper.BatchNorm2d(norm_type=norm_type)(64)),
-                ('relu1', nn.ReLU(inplace=False)),
-                ('conv2', nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)),
-                ('bn2', ModuleHelper.BatchNorm2d(norm_type=norm_type)(64)),
-                ('relu2', nn.ReLU(inplace=False)),
-                ('conv3', nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False)),
-                ('bn3', ModuleHelper.BatchNorm2d(norm_type=norm_type)(self.inplanes)),
-                ('relu3', nn.ReLU(inplace=False))]
-            ))
-        else:
-            self.prefix = nn.Sequential(OrderedDict([
-                ('conv1', nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)),
-                ('bn1', ModuleHelper.BatchNorm2d(norm_type=norm_type)(self.inplanes)),
-                ('relu', nn.ReLU(inplace=False))]
-            ))
-
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)  # change.
-
-        self.layer1 = self._make_layer(block, 64, layers[0], norm_type=norm_type)
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_type=norm_type)
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_type=norm_type)
-        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_type=norm_type)
-        self.avgpool = nn.AvgPool2d(7, stride=1)
-        self.fc = nn.Linear(512 * block.expansion, num_classes)
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-            elif isinstance(m, ModuleHelper.BatchNorm2d(norm_type=norm_type, ret_cls=True)):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-    def _make_layer(self, block, planes, blocks, stride=1, norm_type=None):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                ModuleHelper.BatchNorm2d(norm_type=norm_type)(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, downsample, norm_type=norm_type))
-        self.inplanes = planes * block.expansion
-        for i in range(1, blocks):
-            layers.append(block(self.inplanes, planes, norm_type=norm_type))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.prefix(x)
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        x = self.avgpool(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-
-        return x
-
-
-def resnet18(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-    """Constructs a ResNet-18 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-        norm_type (str): choose norm type
-    """
-    model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes, deep_base=False, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
-
-def deepbase_resnet18(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-    """Constructs a ResNet-18 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    """
-    model = ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes, deep_base=True, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
-
-def resnet34(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-    """Constructs a ResNet-34 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    """
-    model = ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, deep_base=False, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
-
-def deepbase_resnet34(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-    """Constructs a ResNet-34 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    """
-    model = ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, deep_base=True, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
-
-def resnet50(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-# def resnet50(num_classes=1000, pretrained=None, norm_type='encsync_batchnorm', **kwargs):
-    """Constructs a ResNet-50 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    # """
-    # print("entered")
-    # input()
-    model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, deep_base=False, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
-
-def deepbase_resnet50(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-# def deepbase_resnet50(num_classes=1000, pretrained=None, norm_type='encsync_batchnorm', **kwargs):
-# def deepbase_resnet50(num_classes=1000, pretrained=None, norm_type='lib', **kwargs):
-    """Constructs a ResNet-50 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    """
-    model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, deep_base=True, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
-
-def resnet101(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-    """Constructs a ResNet-101 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    """
-    model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, deep_base=False, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
-
-def deepbase_resnet101(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-    """Constructs a ResNet-101 model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    """
-    model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, deep_base=True, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
-
-def resnet152(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-    """Constructs a ResNet-152 model.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    """
-    model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, deep_base=False, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
-
-def deepbase_resnet152(num_classes=1000, pretrained=None, norm_type='batchnorm', **kwargs):
-    """Constructs a ResNet-152 model.
-
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on Places
-    """
-    model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, deep_base=True, norm_type=norm_type)
-    model = ModuleHelper.load_model(model, pretrained=pretrained)
-    return model
diff --git a/models/modeling/decoder.py b/models/modeling/decoder.py
deleted file mode 100644
index 923a174..0000000
--- a/models/modeling/decoder.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from models.modeling.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
-
-class Decoder(nn.Module):
-    def __init__(self, num_classes, backbone, BatchNorm):
-        super(Decoder, self).__init__()
-        if backbone == 'resnet101' or backbone == 'resnet50' or backbone == 'drn':
-            low_level_inplanes = 256
-        elif backbone == 'xception':
-            low_level_inplanes = 128
-        elif backbone == 'mobilenet':
-            low_level_inplanes = 24
-        else:
-            raise NotImplementedError
-
-        self.conv1 = nn.Conv2d(low_level_inplanes, 48, 1, bias=False)
-        self.bn1 = BatchNorm(48)
-        self.relu = nn.ReLU()
-        self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False),
-                                       BatchNorm(256),
-                                       nn.ReLU(),
-                                       nn.Dropout(0.5),
-                                       nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
-                                       BatchNorm(256),
-                                       nn.ReLU(),
-                                    #    nn.Dropout(0.1),
-                                    #    nn.Conv2d(256, num_classes, kernel_size=1, stride=1)
-                                       )
-        self._init_weight()
-
-    def forward(self, x, low_level_feat):
-        low_level_feat = self.conv1(low_level_feat)
-        low_level_feat = self.bn1(low_level_feat)
-        low_level_feat = self.relu(low_level_feat)
-
-        x = F.interpolate(x, size=low_level_feat.size()[2:], mode='bilinear', align_corners=True)
-        x = torch.cat((x, low_level_feat), dim=1)
-        x = self.last_conv(x)
-
-        return x
-
-    def _init_weight(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                torch.nn.init.kaiming_normal_(m.weight)
-            elif isinstance(m, SynchronizedBatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.SyncBatchNorm):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-def build_decoder(num_classes, backbone, BatchNorm):
-    return Decoder(num_classes, backbone, BatchNorm)
\ No newline at end of file
diff --git a/models/modeling/deeplab.py b/models/modeling/deeplab.py
deleted file mode 100644
index f9045fe..0000000
--- a/models/modeling/deeplab.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from models.modeling.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d
-from models.modeling.aspp import build_aspp
-from models.modeling.decoder import build_decoder
-from models.modeling.backbone import build_backbone
-
-class DeepLab(nn.Module):
-    def __init__(self, backbone='resnet101', output_stride=16, num_classes=21,
-                 sync_bn=False, freeze_bn=False):
-        super(DeepLab, self).__init__()
-        if backbone == 'drn':
-            output_stride = 8
-
-        assert sync_bn == False
-        assert freeze_bn == False
-
-        if sync_bn == True:
-            BatchNorm = SynchronizedBatchNorm2d
-        else:
-            BatchNorm = nn.BatchNorm2d
-
-        self.backbone = build_backbone(backbone, output_stride, BatchNorm)
-        self.aspp = build_aspp(backbone, output_stride, BatchNorm)
-        self.decoder = build_decoder(num_classes, backbone, BatchNorm)
-
-        self.freeze_bn = freeze_bn
-
-    def forward(self, input):
-        x, low_level_feat = self.backbone(input)
-        x = self.aspp(x)
-        x = self.decoder(x, low_level_feat)
-        # x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True)
-        return x
-
-    def freeze_bn(self):
-        for m in self.modules():
-            if isinstance(m, SynchronizedBatchNorm2d):
-                m.eval()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.eval()
-
-    def get_backbone_params(self):
-        modules = [self.backbone]
-        for i in range(len(modules)):
-            for m in modules[i].named_modules():
-                if self.freeze_bn:
-                    if isinstance(m[1], nn.Conv2d):
-                        for p in m[1].parameters():
-                            if p.requires_grad:
-                                yield p
-                else:
-                    if isinstance(m[1], nn.Conv2d) or isinstance(m[1], SynchronizedBatchNorm2d) \
-                            or isinstance(m[1], nn.BatchNorm2d) or isinstance(m[1], nn.SyncBatchNorm):
-                        for p in m[1].parameters():
-                            if p.requires_grad:
-                                yield p
-
-    def get_module_params(self):
-        modules = [self.aspp, self.decoder]
-        for i in range(len(modules)):
-            for m in modules[i].named_modules():
-                if self.freeze_bn:
-                    if isinstance(m[1], nn.Conv2d):
-                        for p in m[1].parameters():
-                            if p.requires_grad:
-                                yield p
-                else:
-                    if isinstance(m[1], nn.Conv2d) or isinstance(m[1], SynchronizedBatchNorm2d) \
-                            or isinstance(m[1], nn.BatchNorm2d) or isinstance(m[1], nn.SyncBatchNorm):
-                        for p in m[1].parameters():
-                            if p.requires_grad:
-                                yield p
-
-if __name__ == "__main__":
-    model = DeepLab(backbone='mobilenet', output_stride=16)
-    model.eval()
-    input = torch.rand(1, 3, 513, 513)
-    output = model(input)
-    print(output.size())
-
-