main.lua

--
-- An implementation of the method described in 'A Neural Algorithm of Artistic
-- Style' by Leon Gatys, Alexander Ecker, and Matthias Bethge.
--
-- http://arxiv.org/abs/1508.06576
--

require 'torch'
require 'cutorch'
require 'nn'
require 'cunn'
require 'image'
require 'paths'
require 'optim'
local pl = require('pl.import_into')()
local printf = pl.utils.printf

paths.dofile('inception.lua')
paths.dofile('images.lua')

local cmd = torch.CmdLine()
cmd:text()
cmd:text('A Neural Algorithm of Artistic Style')
cmd:text()
cmd:text('Options:')
cmd:option('--style',           'none',  'Path to style image')
cmd:option('--content',         'none',  'Path to content image')
cmd:option('--style_factor',     5e9,    'Trade-off factor between style and content')
cmd:option('--num_iters',        500,    'Number of iterations')
cmd:option('--size',             500,    'Length of image long edge (0 to use original content size)')
cmd:option('--display_interval', 20,     'Iterations between image displays (0 to suppress display)')
cmd:option('--smoothness',       6e-3,   'Total variation norm regularization strength (higher for smoother output)')
cmd:option('--init',            'image', '{image, random}. Initialization mode for optimized image.')
cmd:option('--backend',         'cunn',  '{cunn, cudnn}. Neural network CUDA backend.')
cmd:option('--optimizer',       'sgd',   '{sgd, lbfgs}. Optimization algorithm.')
local opt = cmd:parse(arg)
if opt.size <= 0 then
    opt.size = nil
end

local euclidean = nn.MSECriterion()
euclidean.sizeAverage = false
euclidean:cuda()

-- compute the Gramian matrix for input
function gram(input)
    local k = input:size(2)
    local flat = input:view(k, -1)
    local gram = torch.mm(flat, flat:t())
    return gram
end

function collect_activations(model, activation_layers, gram_layers)
    local activations, grams = {}, {}
    for i, module in ipairs(model.modules) do
        local name = module._name
        if name then
            if activation_layers[name] then
                local activation = module.output.new()
                activation:resize(module.output:nElement())
                activation:copy(module.output)
                activations[name] = activation
            end

            if gram_layers[name] then
                grams[name] = gram(module.output):view(-1)
            end
        end
    end
    return activations, grams
end

function style_grad(gen, orig_gram)
    local k = gen:size(2)
    local size = gen:nElement()
    local size_sq = size * size
    local gen_gram = gram(gen)
    local gen_gram_flat = gen_gram:view(-1)
    local loss = euclidean:forward(gen_gram_flat, orig_gram)
    local grad = euclidean:backward(gen_gram_flat, orig_gram)
                          :view(gen_gram:size())

    -- normalization helps improve the appearance of the generated image
    local norm = torch.abs(grad):mean() * size_sq
    if norm > 0 then
        loss = loss / norm
        grad:div(norm)
    end
    grad = torch.mm(grad, gen:view(k, -1)):view(gen:size())
    return loss, grad
end

function content_grad(gen, orig)
    local gen_flat = gen:view(-1)
    local loss = euclidean:forward(gen_flat, orig)
    local grad = euclidean:backward(gen_flat, orig):view(gen:size())
    local norm = torch.abs(grad):mean()
    if norm > 0 then
        loss = loss / norm
        grad:div(norm)
    end
    return loss, grad
end

-- total variation gradient
function total_var_grad(gen)
    local x_diff = gen[{{}, {}, {1, -2}, {1, -2}}] - gen[{{}, {}, {1, -2}, {2, -1}}]
    local y_diff = gen[{{}, {}, {1, -2}, {1, -2}}] - gen[{{}, {}, {2, -1}, {1, -2}}]
    local grad = gen.new():resize(gen:size()):zero()
    grad[{{}, {}, {1, -2}, {1, -2}}]:add(x_diff):add(y_diff)
    grad[{{}, {}, {1, -2}, {2, -1}}]:add(-1, x_diff)
    grad[{{}, {}, {2, -1} ,{1, -2}}]:add(-1, y_diff)
    return grad
end

-- load model
local model = create_model('inception_caffe.th', opt.backend)
collectgarbage()

-- choose style and content layers
local style_weights = {
    ['conv1/7x7_s2'] = 1,
    ['conv2/3x3']    = 1,
    ['inception_3a'] = 1,
    ['inception_3b'] = 1,
    ['inception_4a'] = 1,
    ['inception_4b'] = 1,
    ['inception_4c'] = 1,
    ['inception_4d'] = 1,
    ['inception_4e'] = 1,
}

local content_weights = {
    ['inception_3a'] = 1,
    ['inception_4a'] = 1,
}

-- compute normalization factor
local style_weight_sum = 0
local content_weight_sum = 0
for k, v in pairs(style_weights) do
    style_weight_sum = style_weight_sum + v
end

for k, v in pairs(content_weights) do
    content_weight_sum = content_weight_sum + v
end

-- load content image
local img = preprocess(image.load(opt.content), opt.size):cuda()
model:forward(img)
local img_activations, _ = collect_activations(model, content_weights, {})

-- load style image
local art = preprocess(
    image.load(opt.style), math.max(img:size(3), img:size(4))
):cuda()
model:forward(art)
local _, art_grams = collect_activations(model, {}, style_weights)
art = nil
collectgarbage()

function opfunc(input)
    -- forward prop
    model:forward(input)

    -- backpropagate
    local loss = 0
    local grad = torch.CudaTensor(model.output:size()):zero()
    for i = #model.modules, 1, -1 do
        local module_input = (i == 1) and input or model.modules[i - 1].output
        local module = model.modules[i]
        local name = module._name

        -- add content gradient
        if name and content_weights[name] then
            local c_loss, c_grad = content_grad(module.output, img_activations[name])
            local w = content_weights[name] / content_weight_sum
            --printf('[content]\t%s\t%.2e\n', name, w * c_loss)
            loss = loss + w * c_loss
            grad:add(w, c_grad)
        end

        -- add style gradient
        if name and style_weights[name] then
            local s_loss, s_grad = style_grad(module.output, art_grams[name])
            local w = opt.style_factor * style_weights[name] / style_weight_sum
            --printf('[style]\t%s\t%.2e\n', name, w * s_loss)
            loss = loss + w * s_loss
            grad:add(w, s_grad)
        end
        grad = module:backward(module_input, grad)
    end

    -- total variation regularization for denoising
    grad:add(total_var_grad(input):mul(opt.smoothness))
    return loss, grad:view(-1)
end

-- image to optimize
local input
if opt.init == 'image' then
    input = img
elseif opt.init == 'random' then
    input = preprocess(
        torch.randn(3, img:size(3), img:size(4)):mul(0.1):add(0.5):clamp(0, 1)
    ):cuda()
else
    error('unrecognized initialization option: ' .. opt.init)
end

local timer = torch.Timer()
local output = depreprocess(input):double()
if opt.display_interval > 0 then
    image.display(output)
end

-- make directory to save intermediate frames
local frames_dir = 'frames'
if not paths.dirp(frames_dir) then
    paths.mkdir(frames_dir)
end
image.save(paths.concat(frames_dir, '0.jpg'), output)

-- set optimizer options
local optim_state
if opt.optimizer == 'sgd' then
    optim_state = {
        learningRate = 0.1,
        momentum = 0.9,
        dampening = 0.0,
    }
elseif opt.optimizer == 'lbfgs' then
    optim_state = {
        maxIter = 3,
        learningRate = 1,
    }
else
    error('unknown optimizer: ' .. opt.optimizer)
end

-- optimize
for i = 1, opt.num_iters do
    local _, loss = optim[opt.optimizer](opfunc, input, optim_state)
    loss = loss[1]

    -- anneal learning rate
    if opt.optimizer == 'sgd' and i % 100 == 0 then
        optim_state.learningRate = 0.75 * optim_state.learningRate
    end

    if i % 10 == 0 then
        printf('iter %5d\tloss %8.2e\tlr %8.2e\ttime %4.1f\n',
            i, loss, optim_state.learningRate, timer:time().real)
    end

    if i <= 20 or i % 5 == 0 then
        output = depreprocess(input):double()
        if opt.display_interval > 0 and i % opt.display_interval == 0 then
            image.display(output)
        end
        image.save(paths.concat(frames_dir, i .. '.jpg'), output)
    end
end

output = depreprocess(input)
if opt.display_interval > 0 then
    image.display(output)
end