cifar_training.py

# -*- coding: utf-8 -*-
"""CIFAR_training.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1XcUVE2Bt2CVgmD9uHwATsBJ40MC9w0PC
"""

datasetName = "CIFAR-100"
# datasetName = "CIFAR-10"
if (datasetName == "CIFAR-100"):
    tc = 100
if (datasetName == "CIFAR-10"):
    tc = 10

import torch
import torchvision
import torchvision.transforms as transforms
import pickle

normalize = transforms.Normalize(mean=[0.4914, 0.4824, 0.4467],
                                             std=[0.2471, 0.2435, 0.2616])
test_transforms = transforms.Compose([
    transforms.ToTensor(),
    normalize,
])
train_transforms = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    test_transforms,
])
batch_size = 64

torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
#random.seed(0)

import math

import torch
from torch import nn

from torchvision.models.resnet import conv3x3

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        if self.downsample is not None:
            x = self.downsample(x)
        residual = self.conv1(residual)
        residual = self.bn1(residual)
        residual = self.relu1(residual)
        residual = self.conv2(residual)
        residual = self.bn2(residual)
        x = x + residual
        x = self.relu2(x)

        return x


class DownsampleB(nn.Module):
    def __init__(self, nIn, nOut, stride):
        super(DownsampleB, self).__init__()
        self.avg = nn.AvgPool2d(stride)
        self.expand_ratio = nOut // nIn

    def forward(self, x):
        x = self.avg(x)
        return torch.cat([x] + [x.mul(0)] * (self.expand_ratio - 1), 1)


class ResNet(nn.Module):
    '''Small ResNet for CIFAR & SVHN '''
    def __init__(self, depth=32, block=BasicBlock, initial_stride=1, num_classes=tc):
        assert (depth - 2) % 6 == 0, 'depth should be one of 6N+2'
        super(ResNet, self).__init__()
        n = (depth - 2) // 6
        self.inplanes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=initial_stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 16, n)
        self.layer2 = self._make_layer(block, 32, n, stride=2)
        self.layer3 = self._make_layer(block, 64, n, stride=2)
        self.avgpool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, num_blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = DownsampleB(self.inplanes, planes * block.expansion, stride)

        layers = [block(self.inplanes, planes, stride, downsample=downsample)]
        self.inplanes = planes * block.expansion
        for _ in range(1, num_blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    @property
    def classifier(self):
        return self.fc

    @property
    def num_classes(self):
        return self.fc.weight.size(-2)

    @property
    def num_features(self):
        return self.fc.weight.size(-1)

    def extract_features(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        return x

    def forward(self, x):
        return self.fc(self.extract_features(x))

import torch.optim as optim

def train(model, trainloader, num_epochs=2):
    if torch.cuda.is_available():
        model = model.cuda()
#         if torch.cuda.device_count() > 1:
#             model = torch.nn.DataParallel(model).cuda()

    lr=0.1
    wd=1e-4
    momentum=0.9
    lr_drops=[0.5, 0.75]

    criterion = nn.CrossEntropyLoss()

    milestones = [int(lr_drop * num_epochs) for lr_drop in (lr_drops or [])]

    optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    weight_decay=wd,
                                    momentum=momentum,
                                    nesterov=True)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=milestones,
                                                     gamma=0.1)
    
    for epoch in range(num_epochs):
        model.train()

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, targets = data
            optimizer.zero_grad()

            if torch.cuda.is_available():
                inputs = inputs.cuda()
                targets = targets.cuda()

            # Compute output and losses
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            preds = outputs.argmax(dim=-1)

            # Backward through model
            error = torch.ne(targets, preds).float().mean()
            loss.backward()
            running_loss += loss.item()

            # Update the model
            optimizer.step()
        print('[%d] loss: %.3f' % (epoch + 1, running_loss))
        running_loss = 0.0
        scheduler.step()

from torch.distributions import Categorical
      

from torch.nn.functional import cross_entropy

def getFilterIdx(filteredPrediction):
    return [i for i in range(len(filteredPrediction)) if filteredPrediction[i] == 1] 

def get_test_acc(model, testloader):
    correct = 0
    total = 0
    loss_tot = 0.0
    error_tot = 0.0
    top5_error_tot = 0.0
    count = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            
            if torch.cuda.is_available():
                images = images.cuda()
                labels = labels.cuda()
                
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            losses = cross_entropy(outputs, labels, reduction="none")
            confs, preds = outputs.topk(5, dim=-1, largest=True, sorted=True)
            is_correct = preds.eq(labels.unsqueeze(-1)).float()
            loss = losses.mean()
            error = 1 - is_correct[:, 0].mean()
            top5_error = 1 - is_correct.sum(dim=-1).mean()
            
            loss_tot += loss.item()
            error_tot += error.item()
            top5_error_tot += top5_error.item()
            
            count += 1
    
    accuracy = correct / total
    loss_tot /= count
    error_tot /= count
    top5_error_tot /= count
    
    return accuracy, loss_tot, error_tot, top5_error_tot


import pickle 
import random

torch.cuda.set_device(0)
szlen = []
noisePerc = "20" #percentage corruption


# print("Solving for:", noisePerc, const)

noisy_labels = np.load(str(noisePerc) + "_NoisyLabels_" + datasetName + ".npy")   #load noisy labels
noisy_lvl = np.load(str(noisePerc) + "_NoiseLevels_" + datasetName +".npy")  # load noise levels
filteredPrediction = np.load(str(noisePerc) + "_NoiseLevelPrediction_" + datasetName +".npy")  #load filtered predictions 

grn_truth = np.array(noisy_labels == trainset.targets, dtype=int)
print("Number of mislabelled: ", len(grn_truth) - sum(grn_truth), "out of", len(grn_truth))

trainset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                download=True, transform=train_transforms)
testset = torchvision.datasets.CIFAR100(root='./data', train=False,
                                        download=True, transform=test_transforms)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                            shuffle=False, num_workers=2)


filterIdx = getFilterIdx(filteredPrediction)
train_now = torch.utils.data.dataset.Subset(trainset, filterIdx)
szlen.append(len(train_now.indices))

model = ResNet()
trainloader = torch.utils.data.DataLoader(train_now, batch_size=batch_size,shuffle=True, num_workers=2)
train(model, trainloader, 300)
accuracy, loss_tot, error_tot, top5_error_tot = get_test_acc(model, testloader)
print(accuracy, loss_tot, error_tot, top5_error_tot)
print()
torch.save(model.state_dict(), noisePerc + '_' + datasetName + '_' + 'Filter')