CNN/style.py

"""
Email: autuanliu@163.com
Date: 2018/9/25
Ref: https://pytorch.org/tutorials/advanced/neural_style_tutorial.html
"""

import copy
import os

import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from PIL import Image
from torch import nn, optim
from torchvision import models, transforms, utils

# 定义运行程序的设备
dev = torch.device('cuda: 0' if torch.cuda.is_available() else 'cpu')
imsize = 512 if torch.cuda.is_available() else 128
tsfm = transforms.Compose([
    transforms.Resize(imsize),
    transforms.ToTensor()
])


def img_loader(img_name):
    if os.path.exists(img_name):
        image = Image.open(img_name)
        image = tsfm(image).unsqueeze(0)
    return image.to(dev, torch.float)


style_img = img_loader('images/picasso.jpg')
content_img = img_loader('images/dancing.jpg')
assert style_img.size() == content_img.size(), "we need to import style and content images of the same size"
unloader = transforms.ToPILImage()


def imshow(tensor, title=None):
    image = tensor.cpu().clone()  # we clone the tensor to not do changes on it
    image = image.squeeze(0)      # remove the fake batch dimension
    image = unloader(image)
    plt.imshow(image)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# 定义内容损失
class ContentLoss(nn.Module):
    def __init__(self, target):
        super(ContentLoss, self).__init__()
        self.target = target.detach()

    def forward(self, x):
        self.loss = F.mse_loss(x, self.target)
        return x


def gram_matrix(x):
    a, b, c, d = x.size()
    # a=batch size(=1)
    # b=number of feature maps
    # (c,d)=dimensions of a feature map (N=c*d)
    features = x.view(a*b, c*d)
    G = torch.mm(features, features.t())
    return G.div(a*b*c*d)


# 定义风格损失
class StyleLoss(nn.Module):
    def __init__(self, target):
        super(StyleLoss, self).__init__()
        self.target = gram_matrix(target).detach()

    def forward(self, input):
        G = gram_matrix(input)
        self.loss = F.mse_loss(G, self.target)
        return input


# 定义模型
cnn = models.vgg19(pretrained=True).features.to(dev).eval()
# VGG networks are trained on images with each channel normalized by mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225]
mean1 = torch.tensor([0.485, 0.456, 0.406]).to(dev)
std1 = torch.tensor([0.229, 0.224, 0.225]).to(dev)


class Normal(nn.Module):
    def __init__(self, mean, std):
        super(Normal, self).__init__()
        self.mean = torch.tensor(mean).view(-1, 1, 1)
        self.std = torch.tensor(std).view(-1, 1, 1)

    def forward(self, x):
        return (x - self.mean) / self.std


# desired depth layers to compute style/content losses
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']


def get_style_model_and_losses(cnn, normalization_mean, normalization_std, style_img, content_img, content_layers=content_layers_default, style_layers=style_layers_default):
    cnn = copy.deepcopy(cnn)

    # normalization module
    normalization = Normal(mean1, std1).to(dev)
    content_losses = []
    style_losses = []

    # assuming that cnn is a nn.Sequential, so we make a new nn.Sequential
    # to put in modules that are supposed to be activated sequentially
    model = nn.Sequential(normalization)

    i = 0  # increment every time we see a conv
    for layer in cnn.children():
        if isinstance(layer, nn.Conv2d):
            i += 1
            name = f'conv_{i}'
        elif isinstance(layer, nn.ReLU):
            name = f'relu_{i}'
            # The in-place version doesn't play very nicely with the ContentLoss
            # and StyleLoss we insert below. So we replace with out-of-place
            # ones here.
            layer = nn.ReLU(inplace=False)
        elif isinstance(layer, nn.MaxPool2d):
            name = f'pool_{i}'
        elif isinstance(layer, nn.BatchNorm2d):
            name = f'bn_{i}'
        else:
            raise RuntimeError(f'Unrecognized layer: {layer.__class__.__name__}')

        model.add_module(name, layer)

        if name in content_layers:
            # add content loss:
            target = model(content_img).detach()
            content_loss = ContentLoss(target)
            model.add_module(f'content_loss_{i}', content_loss)
            content_losses.append(content_loss)

        if name in style_layers:
            # add style loss:
            target_feature = model(style_img).detach()
            style_loss = StyleLoss(target_feature)
            model.add_module(f'style_loss_{i}', style_loss)
            style_losses.append(style_loss)

    # now we trim off the layers after the last content and style losses
    for i in range(len(model) - 1, -1, -1):
        if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss):
            break
    model = model[:(i + 1)]
    return model, style_losses, content_losses


input_img = content_img.clone()


def get_input_optimizer(input_img):
    optimizer = optim.LBFGS([input_img.requires_grad_()])
    return optimizer


def run_style_transfer(cnn, normalization_mean, normalization_std, content_img, style_img, input_img, num_steps=300, style_weight=1000000, content_weight=1):
    model, style_losses, content_losses = get_style_model_and_losses(
        cnn, normalization_mean, normalization_std, style_img, content_img)
    optimizer = get_input_optimizer(input_img)

    run = [0]
    while run[0] <= num_steps:
        def closure():
            # correct the values of updated input image
            input_img.data.clamp_(0, 1)
            optimizer.zero_grad()
            model(input_img)
            style_score = 0
            content_score = 0
            for sl in style_losses:
                style_score += sl.loss
            for cl in content_losses:
                content_score += cl.loss
            style_score *= style_weight
            content_score *= content_weight
            loss = style_score + content_score
            loss.backward()
            run[0] += 1
            if run[0] % 50 == 0:
                print(f'run {run}\nStyle Loss : {style_score.item():4f} Content Loss: {content_score.item():4f}')
            return style_score + content_score
        optimizer.step(closure)
    # a last correction...
    input_img.data.clamp_(0, 1)
    return input_img


output = run_style_transfer(
    cnn, mean1, std1, content_img, style_img, input_img)

plt.figure(1)
imshow(style_img, title='Style Image')

plt.figure(2)
imshow(content_img, title='Content Image')
plt.figure(3)
imshow(output)
plt.show()

# 保存结果(使用pillow的方式)
# image1 = output.cpu().clone().squeeze(0)
# image1 = unloader(image1)
# image1.save('images/res.jpg', 'JPEG')

# 保存结果(使用 torchvision 的方式)
utils.save_image(output, 'images/res.jpg')