diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..cc67606 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python.linting.pylintEnabled": true, + "python.linting.enabled": true +} \ No newline at end of file diff --git a/Experiment-AGW-market.sh b/Experiment-AGW-market.sh index dcb3304..4f0a6e6 100644 --- a/Experiment-AGW-market.sh +++ b/Experiment-AGW-market.sh @@ -8,5 +8,5 @@ # weight regularized triplet loss # generalized mean pooling # non local blocks -python3 tools/main.py --config_file='configs/AGW_baseline.yml' MODEL.DEVICE_ID "('3')" \ -DATASETS.NAMES "('market1501')" OUTPUT_DIR "('./log/market1501/Experiment-AGW-baseline')" \ No newline at end of file +python3 tools/main.py --config_file='configs/AGW_baseline.yml' MODEL.DEVICE_ID "('0')" \ +DATASETS.NAMES "('market1501')" OUTPUT_DIR "('./log/market1501/cosface-1')" \ No newline at end of file diff --git a/config/defaults.py b/config/defaults.py index a1f65f1..2ce0864 100644 --- a/config/defaults.py +++ b/config/defaults.py @@ -22,7 +22,7 @@ # ID number of GPU _C.MODEL.DEVICE_ID = '0' # Name of backbone -_C.MODEL.NAME = 'resnet50' +_C.MODEL.BACKBONE = 'resnet50' # Last stride of backbone _C.MODEL.LAST_STRIDE = 1 # Path to pretrained model of backbone @@ -36,9 +36,14 @@ _C.MODEL.CENTER_FEAT_DIM = 2048 # If train with weighted regularized triplet loss, options: 'on', 'off' _C.MODEL.WEIGHT_REGULARIZED_TRIPLET = 'off' -# If train with generalized mean pooling, options: 'on', 'off' -_C.MODEL.GENERALIZED_MEAN_POOL = 'off' - +# custom config +_C.MODEL.POOL_TYPE = 'avg' +_C.MODEL.COSINE_LOSS_TYPE = '' +_C.MODEL.SCALING_FACTOR = 60.0 +_C.MODEL.MARGIN = 0.35 +_C.MODEL.USE_BNBIAS = False +_C.MODEL.USE_DROPOUT = True +_C.MODEL.USE_SESTN = False # ----------------------------------------------------------------------------- # INPUT diff --git a/configs/AGW_baseline.yml b/configs/AGW_baseline.yml index c089638..7f46037 100644 --- a/configs/AGW_baseline.yml +++ b/configs/AGW_baseline.yml @@ -1,13 +1,19 @@ MODEL: - PRETRAIN_CHOICE: 'self' - PRETRAIN_PATH: "/content/drive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/market1501/Experiment-AGW-baseline/resnet50_nl_model_160.pth" - # PRETRAIN_PATH: 'C:/Users/60310249/.torch/models/resnet50-19c8e357.pth' + PRETRAIN_CHOICE: 'imagenet' + # PRETRAIN_PATH: "/content/drive/Shared drives/REID/ReId-Survey-Paper/ReID-Survey/log/market1501/local-AGW-baseline/resnet50_nl_model_120.pth" + PRETRAIN_PATH: '/home/thanit456/torch/weights/resnet50-19c8e357.pth' TRANSFER_MODE : 'off' CENTER_LOSS: 'on' CENTER_FEAT_DIM: 2048 - NAME: 'resnet50_nl' + BACKBONE: 'resnet50_nl' WEIGHT_REGULARIZED_TRIPLET: 'on' - GENERALIZED_MEAN_POOL: 'on' + POOL_TYPE: 'avg' + COSINE_LOSS_TYPE: 'CosFace' + SCALING_FACTOR: 60.0 + MARGIN: 0.35 + USE_BNBIAS: False + USE_DROPOUT: False + USE_SESTN: False INPUT: IMG_SIZE: [256, 128] @@ -32,6 +38,8 @@ SOLVER: WEIGHT_DECAY: 0.0005 IMS_PER_BATCH: 64 + # IMS_PER_BATCH: 32 + STEPS: [40, 70] GAMMA: 0.1 diff --git a/evaluate/__init__.py b/evaluate/__init__.py new file mode 100644 index 0000000..f735821 --- /dev/null +++ b/evaluate/__init__.py @@ -0,0 +1,12 @@ +import torch + +from .eval_reid import eval_func + +def euclidean_dist(x, y): + m, n = x.size(0), y.size(0) + xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) + yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() + dist = xx + yy + dist.addmm_(1, -2, x, y.t()) + dist = dist.clamp(min=1e-12).sqrt() + return dist diff --git a/evaluate/eval_reid.py b/evaluate/eval_reid.py new file mode 100644 index 0000000..682d689 --- /dev/null +++ b/evaluate/eval_reid.py @@ -0,0 +1,65 @@ +import numpy as np + +def eval_func(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=200): + """Evaluation with market1501 metric + Key: for each query identity, its gallery images from the same camera view are discarded. + """ + num_q, num_g = distmat.shape + if num_g < max_rank: + max_rank = num_g + print("Note: number of gallery samples is quite small, got {}".format(num_g)) + indices = np.argsort(distmat, axis=1) + matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) + + # compute cmc curve for each query + all_cmc = [] + all_AP = [] + num_valid_q = 0. # number of valid query + for q_idx in range(num_q): + # get query pid and camid + q_pid = q_pids[q_idx] + q_camid = q_camids[q_idx] + + # remove gallery samples that have the same pid and camid with query + order = indices[q_idx] + remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) + keep = np.invert(remove) + + # compute cmc curve + # binary vector, positions with value 1 are correct matches + orig_cmc = matches[q_idx][keep] + if not np.any(orig_cmc): + # this condition is true when query identity does not appear in gallery + # [update:20191029] divide by query + all_AP.append(0) + continue + + cmc = orig_cmc.cumsum() + cmc[cmc > 1] = 1 + + all_cmc.append(cmc[:max_rank]) + num_valid_q += 1. + + # compute average precision + # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision + # [update:20191029] support for map@max_rank + orig_cmc = orig_cmc[:max_rank] + if not np.any(orig_cmc): + all_AP.append(0) + continue + num_rel = orig_cmc.sum() + tmp_cmc = orig_cmc.cumsum() + tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)] + tmp_cmc = np.asarray(tmp_cmc) * orig_cmc + AP = tmp_cmc.sum() / num_rel + all_AP.append(AP) + + # assert num_valid_q > 0, "Error: all query identities do not appear in gallery" + + all_cmc = np.asarray(all_cmc).astype(np.float32) + # [update:20191029] divide by query + all_cmc = all_cmc.sum(0) / num_q + + mAP = np.mean(all_AP) + + return all_cmc, mAP,all_AP \ No newline at end of file diff --git a/modeling/__init__.py b/modeling/__init__.py index 0bfc57d..c9a13f4 100644 --- a/modeling/__init__.py +++ b/modeling/__init__.py @@ -3,6 +3,16 @@ from .baseline import Baseline def build_model(cfg, num_classes): - model = Baseline(num_classes, cfg.MODEL.LAST_STRIDE, cfg.MODEL.PRETRAIN_PATH, cfg.MODEL.NAME, - cfg.MODEL.GENERALIZED_MEAN_POOL, cfg.MODEL.PRETRAIN_CHOICE) + model = Baseline(num_classes=num_classes, + last_stride=cfg.MODEL.LAST_STRIDE, + model_path=cfg.MODEL.PRETRAIN_PATH, + backbone=cfg.MODEL.BACKBONE, + pool_type=cfg.MODEL.POOL_TYPE, + use_dropout=cfg.MODEL.USE_DROPOUT, + cosine_loss_type=cfg.MODEL.COSINE_LOSS_TYPE, + s=cfg.MODEL.SCALING_FACTOR, + m=cfg.MODEL.MARGIN, + use_bnbias=cfg.MODEL.USE_BNBIAS, + use_sestn=cfg.MODEL.USE_SESTN, + pretrain_choice=cfg.MODEL.PRETRAIN_CHOICE) return model \ No newline at end of file diff --git a/modeling/backbones/resnet_ibn_a.py b/modeling/backbones/resnet_ibn_a.py index d65cd54..41f60b9 100644 --- a/modeling/backbones/resnet_ibn_a.py +++ b/modeling/backbones/resnet_ibn_a.py @@ -3,6 +3,7 @@ import math import torch.utils.model_zoo as model_zoo +from ..layer.attention import SESTNLayer __all__ = ['ResNet_IBN', 'resnet50_ibn_a', 'resnet101_ibn_a', 'resnet152_ibn_a'] @@ -76,7 +77,7 @@ def forward(self, x): class ResNet_IBN(nn.Module): - def __init__(self, last_stride, block, layers, num_classes=1000): + def __init__(self, last_stride, block, layers, num_classes=1000, use_sestn=False): scale = 64 self.inplanes = scale super(ResNet_IBN, self).__init__() @@ -92,6 +93,11 @@ def __init__(self, last_stride, block, layers, num_classes=1000): self.avgpool = nn.AvgPool2d(7) self.fc = nn.Linear(scale * 8 * block.expansion, num_classes) + self.use_sestn = use_sestn + if use_sestn: + self.sestn1 = SESTNLayer(256, 16) + self.sestn2 = SESTNLayer(512, 32) + for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels @@ -130,7 +136,15 @@ def forward(self, x): x = self.maxpool(x) x = self.layer1(x) + + if self.use_sestn: + x = self.sestn1(x) + x = self.layer2(x) + + if self.use_sestn: + x = self.sestn2(x) + x = self.layer3(x) x = self.layer4(x) diff --git a/modeling/baseline.py b/modeling/baseline.py index 8a42625..3e98e13 100644 --- a/modeling/baseline.py +++ b/modeling/baseline.py @@ -8,8 +8,8 @@ from .backbones.senet import SENet, SEResNetBottleneck, SEBottleneck, SEResNeXtBottleneck from .backbones.resnet_ibn_a import resnet50_ibn_a from .backbones.resnet_nl import ResNetNL -from .layer import CrossEntropyLabelSmooth, TripletLoss, WeightedRegularizedTriplet, CenterLoss, GeneralizedMeanPooling, GeneralizedMeanPoolingP - +from .layer import CrossEntropyLabelSmooth, TripletLoss, WeightedRegularizedTriplet, CenterLoss, GeM +from .layer.cosine_loss import AdaCos, CosFace, ArcFace def weights_init_kaiming(m): classname = m.__class__.__name__ @@ -26,7 +26,6 @@ def weights_init_kaiming(m): nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0.0) - def weights_init_classifier(m): classname = m.__class__.__name__ if classname.find('Linear') != -1: @@ -38,27 +37,40 @@ def weights_init_classifier(m): class Baseline(nn.Module): in_planes = 2048 - def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, pretrain_choice): + def __init__(self, + num_classes, + last_stride, + model_path, + backbone="resnet50", + pool_type="avg", + use_dropout=True, + cosine_loss_type='', + s=30.0, + m=0.35, + use_bnbias=False, + use_sestn=False, + pretrain_choice=None, + training=True): super(Baseline, self).__init__() - if model_name == 'resnet50': + if backbone == 'resnet50': self.base = ResNet(last_stride=last_stride, block=Bottleneck, layers=[3, 4, 6, 3]) - elif model_name == 'resnet50_nl': + elif backbone == 'resnet50_nl': self.base = ResNetNL(last_stride=last_stride, block=Bottleneck, layers=[3, 4, 6, 3], non_layers=[0, 2, 3, 0]) - elif model_name == 'resnet101': + elif backbone == 'resnet101': self.base = ResNet(last_stride=last_stride, block=Bottleneck, layers=[3, 4, 23, 3]) - elif model_name == 'resnet152': + elif backbone == 'resnet152': self.base = ResNet(last_stride=last_stride, block=Bottleneck, layers=[3, 8, 36, 3]) - elif model_name == 'se_resnet50': + elif backbone == 'se_resnet50': self.base = SENet(block=SEResNetBottleneck, layers=[3, 4, 6, 3], groups=1, @@ -69,7 +81,7 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'se_resnet101': + elif backbone == 'se_resnet101': self.base = SENet(block=SEResNetBottleneck, layers=[3, 4, 23, 3], groups=1, @@ -80,7 +92,7 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'se_resnet152': + elif backbone == 'se_resnet152': self.base = SENet(block=SEResNetBottleneck, layers=[3, 8, 36, 3], groups=1, @@ -91,7 +103,7 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'se_resnext50': + elif backbone == 'se_resnext50': self.base = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 6, 3], groups=32, @@ -102,7 +114,7 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'se_resnext101': + elif backbone == 'se_resnext101': self.base = SENet(block=SEResNeXtBottleneck, layers=[3, 4, 23, 3], groups=32, @@ -113,51 +125,93 @@ def __init__(self, num_classes, last_stride, model_path, model_name, gem_pool, p downsample_kernel_size=1, downsample_padding=0, last_stride=last_stride) - elif model_name == 'senet154': + elif backbone == 'senet154': self.base = SENet(block=SEBottleneck, layers=[3, 8, 36, 3], groups=64, reduction=16, dropout_p=0.2, last_stride=last_stride) - elif model_name == 'resnet50_ibn_a': - self.base = resnet50_ibn_a(last_stride) - # elif model_name == 'efficientnet' : - # self.base = - + elif backbone == 'resnet50_ibn_a': + self.base = resnet50_ibn_a(last_stride, use_sestn=use_sestn) + if pretrain_choice == 'imagenet': self.base.load_param(model_path) print('Loading pretrained ImageNet model......') self.num_classes = num_classes + in_features = self.in_planes - if gem_pool == 'on': - print("Generalized Mean Pooling") - self.global_pool = GeneralizedMeanPoolingP() + if pool_type == "avg": + self.gap = nn.AdaptiveAvgPool2d(1) + elif "gem" in pool_type: + if pool_type != "gem": + p = pool_type.split()[-1] + p = float(p) + self.gap = GeM(p=p, eps=1e-6, freeze_p=True) + else: + self.gap = GeM(eps=1e-6, freeze_p=False) + elif pool_type == "max": + self.gap = nn.AdaptiveMaxPool2d(1) + elif "Att" in pool_type: + self.gap = eval(pool_type)(in_features = in_features) + in_features = self.gap.out_features(in_features) else: - print("Global Adaptive Pooling") - self.global_pool = nn.AdaptiveAvgPool2d(1) - - self.bottleneck = nn.BatchNorm1d(self.in_planes) - self.bottleneck.bias.requires_grad_(False) # no shift - self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) - + self.gap = eval(pool_type) + in_features = self.gap.out_features(in_features) + + # ? legacy code + # if gem_pool: + # print("Generalized Mean Pooling") + # self.global_pool = GeneralizedMeanPoolingP() + # else: + # print("Global Adaptive Pooling") + # self.global_pool = nn.AdaptiveAvgPool2d(1) + + # bnneck + self.bottleneck = nn.BatchNorm1d(in_features) + if not use_bnbias: + self.bottleneck.bias.requires_grad = False + print("==> remove bnneck bias") + else: + print("==> using bnneck bias") self.bottleneck.apply(weights_init_kaiming) - self.classifier.apply(weights_init_classifier) - - def forward(self, x): - x = self.base(x) + + if cosine_loss_type == '': + self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) + self.classifier.apply(weights_init_classifier) + else: + if cosine_loss_type == 'AdaCos': + self.classifier = eval(cosine_loss_type)(in_features, self.num_classes, m) + # CosFace + else: + self.classifier = eval(cosine_loss_type)(in_features, self.num_classes, s, m) + self.cosine_loss_type = cosine_loss_type + self.use_dropout = use_dropout - global_feat = self.global_pool(x) # (b, 2048, 1, 1) + def forward(self, x, label=None): + global_feat = self.gap(self.base(x)) # (b, 2048, 1, 1) global_feat = global_feat.view(global_feat.shape[0], -1) # flatten to (bs, 2048) - feat = self.bottleneck(global_feat) # normalize for angular softmax - if not self.training: + if self.training: + if self.use_dropout: + feat = self.gap(self.base(x)) + if self.cosine_loss_type == '': + cls_score = self.classifier(feat) + else: + # assert label is not None + cls_score = self.classifier(feat, label) + return cls_score, global_feat # global feature for triplet loss + else: return feat - cls_score = self.classifier(feat) - return cls_score, global_feat + # ? legacy code + # if not self.training: + # return feat + + # cls_score = self.classifier(feat) + # return cls_score, global_feat def load_param(self, trained_path): param_dict = torch.load(trained_path) @@ -207,5 +261,4 @@ def criterion_total(score, feat, target): criterion['total'] = criterion_total - return criterion - + return criterion \ No newline at end of file diff --git a/modeling/layer/__init__.py b/modeling/layer/__init__.py index 8635ca9..43b0579 100644 --- a/modeling/layer/__init__.py +++ b/modeling/layer/__init__.py @@ -3,4 +3,4 @@ from .center_loss import CenterLoss from .triplet_loss import CrossEntropyLabelSmooth, TripletLoss, WeightedRegularizedTriplet from .non_local import Non_local -from .gem_pool import GeneralizedMeanPooling, GeneralizedMeanPoolingP \ No newline at end of file +from .pooling import GeM \ No newline at end of file diff --git a/modeling/layer/attention.py b/modeling/layer/attention.py new file mode 100644 index 0000000..8fa37af --- /dev/null +++ b/modeling/layer/attention.py @@ -0,0 +1,98 @@ +import torch +from torch import nn + +class SELayer(nn.Module): + def __init__(self, channel, reduction=64, multiply=True): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), + nn.Sigmoid() + ) + self.multiply = multiply + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + if self.multiply == True: + return x * y + else: + return y + + +class STNLayer(nn.Module): + def __init__(self, channel_in, multiply=True): + super(STNLayer, self).__init__() + c = channel_in + C = c//32 + self.multiply = multiply + self.conv_in = nn.Conv2d(c, C, kernel_size=1) + self.conv_out = nn.Conv2d(C, 1, kernel_size=1) + # Encoder + self.conv1 = nn.Conv2d(C, 2*C, kernel_size=3) + self.bn1 = nn.BatchNorm2d(2*C) + self.ReLU1 = nn.ReLU(True) + self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True) + self.conv2 = nn.Conv2d(2*C, 4*C, kernel_size=3) + self.bn2 = nn.BatchNorm2d(4*C) + self.ReLU2 = nn.ReLU(True) + + # Decoder + self.deconv1 = nn.ConvTranspose2d(4*C, 2*C, kernel_size=3) + self.bn3 = nn.BatchNorm2d(2*C) + self.ReLU3 = nn.ReLU(True) + self.unpool1 = nn.MaxUnpool2d(kernel_size=2) + self.deconv2 = nn.ConvTranspose2d(2*C, C, kernel_size=3) + self.bn4 = nn.BatchNorm2d(C) + self.ReLU4 = nn.ReLU(True) + + + def forward(self, x): + b, c, _, _ = x.size() + #print("modules: x.shape: " + str(x.shape)) + y = self.conv_in(x) + + # Encode + y = self.conv1(y) + y = self.bn1(y) + y = self.ReLU1(y) + size1 = y.size() + y, indices1 = self.pool1(y) + y = self.conv2(y) + y = self.bn2(y) + y = self.ReLU2(y) + + # Decode + y = self.deconv1(y) + y = self.bn3(y) + y = self.ReLU3(y) + y = self.unpool1(y,indices1,size1) + y = self.deconv2(y) + y = self.bn4(y) + y = self.ReLU4(y) + + y = self.conv_out(y) + #torch.save(y,'./STN_stage1.pkl') + if self.multiply == True: + return x * y + else: + return y + + +class SESTNLayer(nn.Module): + def __init__(self, channel_in, r): + super(SESTNLayer, self).__init__() + c = channel_in + self.se = SELayer(channel=c, reduction=r, multiply=False) + self.stn = STNLayer(channel_in=c, multiply=False) + self.activation = nn.Hardtanh(inplace=True) + self.activation = nn.ReLU(True) + + + def forward(self, x): + y = self.se(x) + z = self.stn(x) + a = self.activation(y+z) # Final joint attention map + return x + x*a \ No newline at end of file diff --git a/modeling/layer/cosine_loss.py b/modeling/layer/cosine_loss.py new file mode 100644 index 0000000..85ba2b7 --- /dev/null +++ b/modeling/layer/cosine_loss.py @@ -0,0 +1,140 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import Parameter +import math + +class ArcFace(nn.Module): + def __init__(self, num_features, num_classes, s=30.0, m=0.50): + super(ArcFace, self).__init__() + self.num_features = num_features + self.num_classes = num_classes + self.s = s + self.m = m + self.W = Parameter(torch.FloatTensor(num_classes, num_features)) + nn.init.xavier_uniform_(self.W) + + def forward(self, input, label=None): + # normalized features + x = F.normalize(self.x) + # normalized weights + W = F.normalize(self.W) + # dot product + logits = F.linear(x, W) + if label is None: + return logits + # add margin + theta = torch.acoss(torch.clamp(logits, -1.0 + 1e-7, 1.0 - 1e-7)) + target_logits = torch.cos(theta + self.m) + one_hot = torch.zeros_like(logits) + one_hot.scatter_(1, label.view(-1, 1).long(), 1) + output = logits * (1 - one_hot) + target_logits * one_hot + # feature re-scale + output *= self.s + + return output + +# class ArcCos(nn.Module): +# def __init__(self, in_features, out_features, s=30.0, m=0.50, bias=False) +# super(ArcCos, self).__init__() +# self.in_features = in_features +# self.out_features = out_features +# self.s = s +# self.m = m +# self.cos_m = math.cos(m) +# self.sin_m = math.sin(m) + +# self.th = math.cos(math.pi - m) +# self.mm = math.sin(math.pi - m) * m + +# self.weight = Parameter(torch.Tensor(out_features, in_features)) +# if bias: +# self.bias = Parameter(torch.Tensor(out_features)) +# else: +# self.register_parameter('bias', None) +# self.register_parameters() + +# def reset_parameters(self): +# nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) +# if self.bias is not None: +# fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) +# boud = 1 / math.sqrt(fan_in) +# nn.init.uniform_(self.bias, -bound, bound) + +# def forward(self, input, label): +# # fix for + +class AdaCos(nn.Module): + def __init__(self, num_features, num_classes, m=0.50): + super(AdaCos,self).__init__() + self.num_features = num_features + self.num_classes = num_classes + self.s = math.sqrt(2) * math.log(num_classes - 1) + self.m = m + self.W = Parameter(torch.FloatTensor(num_classes, num_features)) + nn.init.xavier_uniform_(self.W) + def forward(self, input, label=None): + # normalize features + x = F.normalize(input) + # normalize weights + W = F.normalize(self.W) + # dot product + print('debug logging') + print(x.shape) + print(W.shape) + logits = F.linear(x, W) + if label is None: + return logits + # feature re-scale + theta = torch.acos(torch.clamp(logits, -1.0 + 1e-7, 1.0 - 1e-7)) + one_hot = torch.zeros_like(logits) + one_hot.scatter_(1, label.view(-1, 1).long(), 1) + with torch.no_grad(): + B_avg = torch.where(one_hot < 1, torch.exp(self.s * logits), torch.zeros_like(logits)) + B_avg = torch.sum(B_avg) / input.size(0) + theta_med = torch.median(theta[one_hot == 1]) + self.s = torch.log(B_avg) / torch.cos(torch.min(math.pi/4 * torch.ones_like(theta_med), theta_med)) + output = self.s * logits + return output + +class CosFace(nn.Module): + def __init__(self, num_features, num_classes, s=30.0, m=0.35): + super(CosFace, self).__init__() + self.num_features = num_features + self.num_classes = num_classes + self.s = s + self.m = m + self.W = Parameter(torch.FloatTensor(num_classes, num_features)) + nn.init.xavier_uniform_(self.W) + + def forward(self, input, label=None): + # normalize feature + x = F.normalize(input) + # normalize weights + W = F.normalize(self.W) + # dot product + logits = F.linear(x, W) + if label is None: + return logits + + # # * add margin version + # target_logits = logits - self.m + # one_hot = torch.zeros_like(logits) + # one_hot.scatter_(1, labels.view(-1, 1).long(), 1) + # output = logits - (1 - one_hot) + target_logits * one_hot + + one_hot = torch.zeros_like(logits) + one_hot.scatter_(1, abel.view(-1, 1).long(), 1) + output = logits - one_hot * self.m + # feature re-scale + output *= self.s + + return output + + def __repr__(self): + return self.__class__.__name__ +\ + '(' + 'num_features='+'{}'.format(self.num_features) + \ + ','+'num_classes=' + '{}'.format(self.num_classes) +\ + ', ' + 's=' + str(self.s) + \ + ', ' + 'm=' + str(self.m) +\ + ')' \ No newline at end of file diff --git a/modeling/layer/functional.py b/modeling/layer/functional.py new file mode 100644 index 0000000..48bd60b --- /dev/null +++ b/modeling/layer/functional.py @@ -0,0 +1,9 @@ +import math +import torch +import torch.nn.functional as F + +def gem(x, p=3, eps=1e-6): + return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1. / p) + +def adaptive_gem2d(x, output_size=(1, 1), p=3, eps=1e-6): + return F.adaptive_avg_pool2d(x.clamp(min=eps).pow(p), output_size).pow(1. / p) \ No newline at end of file diff --git a/modeling/layer/gem_pool.py b/modeling/layer/gem_pool.py deleted file mode 100644 index c0d0243..0000000 --- a/modeling/layer/gem_pool.py +++ /dev/null @@ -1,43 +0,0 @@ -# encoding: utf-8 - -import torch -from torch import nn - - -class GeneralizedMeanPooling(nn.Module): - r"""Applies a 2D power-average adaptive pooling over an input signal composed of several input planes. - The function computed is: :math:`f(X) = pow(sum(pow(X, p)), 1/p)` - - At p = infinity, one gets Max Pooling - - At p = 1, one gets Average Pooling - The output is of size H x W, for any input size. - The number of output features is equal to the number of input planes. - Args: - output_size: the target output size of the image of the form H x W. - Can be a tuple (H, W) or a single H for a square image H x H - H and W can be either a ``int``, or ``None`` which means the size will - be the same as that of the input. - """ - - def __init__(self, norm, output_size=1, eps=1e-6): - super(GeneralizedMeanPooling, self).__init__() - assert norm > 0 - self.p = float(norm) - self.output_size = output_size - self.eps = eps - - def forward(self, x): - x = x.clamp(min=self.eps).pow(self.p) - return torch.nn.functional.adaptive_avg_pool2d(x, self.output_size).pow(1. / self.p) - - def __repr__(self): - return self.__class__.__name__ + '(' \ - + str(self.p) + ', ' \ - + 'output_size=' + str(self.output_size) + ')' - - -class GeneralizedMeanPoolingP(GeneralizedMeanPooling): - """ Same, but norm is trainable - """ - def __init__(self, norm=3, output_size=1, eps=1e-6): - super(GeneralizedMeanPoolingP, self).__init__(norm, output_size, eps) - self.p = nn.Parameter(torch.ones(1) * norm) \ No newline at end of file diff --git a/modeling/layer/pooling.py b/modeling/layer/pooling.py new file mode 100644 index 0000000..465000f --- /dev/null +++ b/modeling/layer/pooling.py @@ -0,0 +1,87 @@ +# encoding: utf-8 +import torch +from torch import nn +import torch.nn.functional as F +from torch.nn.parameter import Parameter + +from .functional import gem, adaptive_gem2d + +class GeM(nn.Module): + def __init__(self, p=3.0, eps=1e-6, freeze_p=True): + super(GeM, self).__init__() + self.p = p if freeze_p else Parameter + self.eps = eps + self.freeze_p = freeze_p + def forward(self, x): + return gem(x, p=self.p, eps=self.eps) + def __repr__(self): + if isinstance(self.p, float): + p = self.p + else: + p = self.p.data.tolist()[0] + return self.__class__.__name__ +\ + '(' + 'p=' + '{:.4f}'.format(p) +\ + ', ' + 'eps=' + str(self.eps) + \ + ', ' + 'freeze_p=' + str(self.freeze_p) +\ + ')' + +class AdaptiveGeM2d(nn.Module): + def __init__(self, output_size=(1, 1), p=3.0, eps=1e-6, freeze_p=True): + super(AdaptiveGeM2d, self).__init__() + self.output_size = output_size + self.p = p if freeze_p else Parameter(torch.ones(1) * p) + self.eps = eps + self.freeze_p = freeze_p + + def forward(self, x): + return adaptive_gem2d(x, self.output_size, p=self.p, eps=self.eps) + def __repr__(self): + if isinstance(self.p, float): + p = self.p + else: + p = self.p.data.tolist()[0] + return self.__class__.__name__ +\ + '(' + 'output_size='+'{}'.format(self.output_size) + \ + ','+'p=' + '{:.4f}'.format(p) +\ + ', ' + 'eps=' + str(self.eps) + \ + ', ' + 'freeze_p=' + str(self.freeze_p) +\ + ')' + +# ? legacy code +# class GeneralizedMeanPooling(nn.Module): +# """Applies a 2D power-average adaptive pooling over an input signal composed of several input planes. +# The function computed is: :math:`f(X) = pow(sum(pow(X, p)), 1/p)` +# - At p = infinity, one gets Max Pooling +# - At p = 1, one gets Average Pooling +# The output is of size H x W, for any input size. +# The number of output features is equal to the number of input planes. +# Args: +# output_size: the target output size of the image of the form H x W. +# Can be a tuple (H, W) or a single H for a square image H x H +# H and W can be either a ``int``, or ``None`` which means the size will +# be the same as that of the input. +# """ + +# def __init__(self, norm, output_size=1, eps=1e-6): +# super(GeneralizedMeanPooling, self).__init__() +# assert norm > 0 +# self.p = float(norm) +# self.output_size = output_size +# self.eps = eps + +# def forward(self, x): +# x = x.clamp(min=self.eps).pow(self.p) +# return torch.nn.functional.adaptive_avg_pool2d(x, self.output_size).pow(1. / self.p) + +# def __repr__(self): +# return self.__class__.__name__ + '(' \ +# + str(self.p) + ', ' \ +# + 'output_size=' + str(self.output_size) + ')' + + +# class GeneralizedMeanPoolingP(GeneralizedMeanPooling): +# """ Same, but norm is trainable +# """ +# def __init__(self, norm=3, output_size=1, eps=1e-6): +# super(GeneralizedMeanPoolingP, self).__init__(norm, output_size, eps) +# self.p = nn.Parameter(torch.ones(1) * norm) \ No newline at end of file diff --git a/tools/train.py b/tools/train.py index f84931a..48ca5a5 100644 --- a/tools/train.py +++ b/tools/train.py @@ -84,7 +84,7 @@ def do_train( trainer = create_supervised_trainer(model, optimizer, criterion, cfg.SOLVER.CENTER_LOSS_WEIGHT, device=device) evaluator = create_supervised_evaluator(model, metrics={'r1_mAP_mINP': r1_mAP_mINP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) - checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) + checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.BACKBONE, checkpoint_period, n_saved=10, require_empty=False) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model, 'optimizer': optimizer['model'], 'center_param': criterion['center'], diff --git a/utils/re_ranking_batch.py b/utils/re_ranking_batch.py new file mode 100644 index 0000000..0c7df2b --- /dev/null +++ b/utils/re_ranking_batch.py @@ -0,0 +1,253 @@ +import numpy as np +from scipy import sparse +import torch +import time +from tqdm import tqdm + +from evaluate import eval_func, euclidean_dist + +def calculate_V(initial_rank, all_feature_len, dis_i_qg, i, k1): + # dis_i_qg = euclidean_dist(torch.tensor([all_feature[i].numpy()]), all_feature).numpy() + + forward_k_neigh_index = initial_rank[i, :k1 + 1] + # print(forward_k_neigh_index) + backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1] + + fi = np.where(backward_k_neigh_index == i)[0] + k_reciprocal_index = forward_k_neigh_index[fi] + k_reciprocal_expansion_index = k_reciprocal_index + for j in range(len(k_reciprocal_index)): + candidate = k_reciprocal_index[j] + candidate_forward_k_neigh_index = initial_rank[candidate, :int(np.around(k1 / 2.)) + 1] + candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index, + :int(np.around(k1 / 2.)) + 1] + fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] + candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] + if len(np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2. / 3 * len( + candidate_k_reciprocal_index): + k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index, candidate_k_reciprocal_index) + + k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) + # print(k_reciprocal_expansion_index) + weight = np.exp(-dis_i_qg[k_reciprocal_expansion_index]) + # print(weight) + V = np.zeros(( all_feature_len)).astype(np.float32) + V[k_reciprocal_expansion_index] = 1. * weight / np.sum(weight) + return V, k_reciprocal_expansion_index, weight + + +def re_ranking_batch(all_feature, q_num, k1, k2, lambda_value, len_slice=1000): + + # calculate (q+g)*(q+g) + initial_rank = np.zeros((len(all_feature), k1+1)).astype(np.int32) + + original_dist = np.zeros((q_num, len(all_feature))) + + s_time = time.time() + + n_iter = len(all_feature) // len_slice + int(len(all_feature) % len_slice > 0) + + with tqdm(total=n_iter) as pbar: + for i in range(n_iter): + dis_i_qg = euclidean_dist(all_feature[i*len_slice:(i+1)*len_slice], all_feature).data.cpu().numpy() + initial_i_rank = np.argpartition(dis_i_qg, range(1, k1 + 1), ).astype(np.int32)[:, :k1 + 1] + initial_rank[i*len_slice:(i+1)*len_slice] = initial_i_rank + pbar.update(1) + # print(initial_rank[0]) + + end_time = time.time() + print("rank time : %s" % (end_time-s_time)) + + all_V = [] + + s_time = time.time() + + n_iter = len(all_feature) // len_slice + int(len(all_feature) % len_slice > 0) + + + with tqdm(total=n_iter) as pbar: + for i in range(n_iter): + dis_i_qg = euclidean_dist(all_feature[i * len_slice:(i + 1) * len_slice], all_feature).data.cpu().numpy() + for ks in range(dis_i_qg.shape[0]): + r_k = i*len_slice+ks + dis_i_qg[ks] = np.power(dis_i_qg[ks], 2).astype(np.float32) + dis_i_qg[ks] = 1. * dis_i_qg[ks] / np.max(dis_i_qg[ks]) + if r_k < q_num: + original_dist[r_k] = dis_i_qg[ks] + V ,k_reciprocal_expansion_index, weight = calculate_V(initial_rank, len(all_feature), dis_i_qg[ks], r_k, k1) + # if r_k == 0: + # print(k_reciprocal_expansion_index) + # print(weight) + # print(dis_i_qg[ks]) + all_V.append(sparse.csr_matrix(V)) + + pbar.update(1) + + all_V = sparse.vstack(all_V) + # print(all_V.getrow(0).toarray()) + end_time = time.time() + print("calculate V time : %s" % (end_time - s_time)) + # print(all_V.todense()[0]) + + all_V_qe = [] + s_time = time.time() + for i in range(len(all_feature)): + temp_V = np.zeros((k2, len(all_feature))) + for l, row_index in enumerate(initial_rank[i, :k2]): + temp_V[l, :] = all_V.getrow(row_index).toarray()[0] + + + V_qe = np.mean(temp_V, axis=0) + all_V_qe.append(sparse.csr_matrix(V_qe)) + all_V_qe = sparse.vstack(all_V_qe) + # print(all_V_qe.todense()[0]) + del all_V + end_time = time.time() + print("calculate V_qe time : %s" % (end_time - s_time)) + + invIndex = [] + for i in range(len(all_feature)): + invIndex.append(np.where(all_V_qe.getcol(i).toarray().transpose()[0] != 0)[0]) + jaccard_dist = np.zeros_like(original_dist, dtype=np.float32) + + for i in range(q_num): + temp_min = np.zeros(shape=[1, len(all_feature)], dtype=np.float32) + + indNonZero = np.where(all_V_qe.getrow(i).toarray()[0] != 0)[0] + + indImages = [] + indImages = [invIndex[ind] for ind in indNonZero] + # print(indImages) + for j in range(len(indNonZero)): + # print(indNonZero[j]) + c = all_V_qe.getrow(i).getcol(indNonZero[j]).toarray()[0, 0] + # print(c) + # print(indImages[j]) + + t_min = np.zeros((indImages[j].shape[0])) + for kk in range(indImages[j].shape[0]): + temp_d = all_V_qe.getrow(indImages[j][kk]).getcol(indNonZero[j]).toarray()[0, 0] + t_min[kk] = np.minimum(c, temp_d) + # print(t_min) + + temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + t_min + # temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(V[i, indNonZero[j]], + # V[indImages[j], indNonZero[j]]) + jaccard_dist[i] = 1 - temp_min / (2. - temp_min) + # print(jaccard_dist[0]) + # print(original_dist[0]) + final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value + del original_dist + del all_V_qe + del jaccard_dist + final_dist = final_dist[:q_num, q_num:] + return final_dist + +def re_ranking_batch_gpu(all_feature, q_num, k1, k2, lambda_value, len_slice=1000): + + # calculate (q+g)*(q+g) + initial_rank = np.zeros((len(all_feature), k1+1)).astype(np.int32) + + original_dist = np.zeros((q_num, len(all_feature))) + gpu_features = all_feature.cuda() + s_time = time.time() + + n_iter = len(all_feature) // len_slice + int(len(all_feature) % len_slice > 0) + + with tqdm(total=n_iter) as pbar: + for i in range(n_iter): + dis_i_qg = euclidean_dist(gpu_features[i*len_slice:(i+1)*len_slice], gpu_features).data.cpu().numpy() + initial_i_rank = np.argpartition(dis_i_qg, range(1, k1 + 1), ).astype(np.int32)[:, :k1 + 1] + initial_rank[i*len_slice:(i+1)*len_slice] = initial_i_rank + pbar.update(1) + # print(initial_rank[0]) + + end_time = time.time() + print("rank time : %s" % (end_time-s_time)) + + all_V = [] + + s_time = time.time() + + n_iter = len(all_feature) // len_slice + int(len(all_feature) % len_slice > 0) + + + with tqdm(total=n_iter) as pbar: + for i in range(n_iter): + dis_i_qg = euclidean_dist(gpu_features[i * len_slice:(i + 1) * len_slice], gpu_features).data.cpu().numpy() + for ks in range(dis_i_qg.shape[0]): + r_k = i*len_slice+ks + dis_i_qg[ks] = np.power(dis_i_qg[ks], 2).astype(np.float32) + dis_i_qg[ks] = 1. * dis_i_qg[ks] / np.max(dis_i_qg[ks]) + if r_k < q_num: + original_dist[r_k] = dis_i_qg[ks] + V ,k_reciprocal_expansion_index, weight = calculate_V(initial_rank, len(all_feature), dis_i_qg[ks], r_k, k1) + # if r_k == 0: + # print(k_reciprocal_expansion_index) + # print(weight) + # print(dis_i_qg[ks]) + all_V.append(sparse.csr_matrix(V)) + + pbar.update(1) + + all_V = sparse.vstack(all_V) + # print(all_V.getrow(0).toarray()) + end_time = time.time() + print("calculate V time : %s" % (end_time - s_time)) + # print(all_V.todense()[0]) + + all_V_qe = [] + s_time = time.time() + for i in range(len(all_feature)): + temp_V = np.zeros((k2, len(all_feature))) + for l, row_index in enumerate(initial_rank[i, :k2]): + temp_V[l, :] = all_V.getrow(row_index).toarray()[0] + + + V_qe = np.mean(temp_V, axis=0) + all_V_qe.append(sparse.csr_matrix(V_qe)) + all_V_qe = sparse.vstack(all_V_qe) + # print(all_V_qe.todense()[0]) + del all_V + end_time = time.time() + print("calculate V_qe time : %s" % (end_time - s_time)) + + invIndex = [] + for i in range(len(all_feature)): + invIndex.append(np.where(all_V_qe.getcol(i).toarray().transpose()[0] != 0)[0]) + jaccard_dist = np.zeros_like(original_dist, dtype=np.float32) + + with tqdm(total=q_num) as pbar: + for i in range(q_num): + temp_min = np.zeros(shape=[1, len(all_feature)], dtype=np.float32) + + indNonZero = np.where(all_V_qe.getrow(i).toarray()[0] != 0)[0] + + indImages = [] + indImages = [invIndex[ind] for ind in indNonZero] + # print(indImages) + for j in range(len(indNonZero)): + # print(indNonZero[j]) + c = all_V_qe.getrow(i).getcol(indNonZero[j]).toarray()[0, 0] + # print(c) + # print(indImages[j]) + + t_min = np.zeros((indImages[j].shape[0])) + for kk in range(indImages[j].shape[0]): + temp_d = all_V_qe.getrow(indImages[j][kk]).getcol(indNonZero[j]).toarray()[0, 0] + t_min[kk] = np.minimum(c, temp_d) + # print(t_min) + + temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + t_min + # temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(V[i, indNonZero[j]], + # V[indImages[j], indNonZero[j]]) + jaccard_dist[i] = 1 - temp_min / (2. - temp_min) + pbar.update(1) + # print(jaccard_dist[0]) + # print(original_dist[0]) + final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value + del original_dist + del all_V_qe + del jaccard_dist + final_dist = final_dist[:q_num, q_num:] + return final_dist