Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update residual-layers.lua to be consistent with other torch nn models #6

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ snapshots
mnist.hdf5
*.swp
cache
*.bak
38 changes: 22 additions & 16 deletions residual-layers.lua
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ require 'cudnn'
require 'cunn'
local nninit = require 'nninit'

function addResidualLayer2(input, nChannels, nOutChannels, stride)
function ResidualLayer(nChannels, nOutChannels, stride)
--[[

Residual layers! Implements option (A) from Section 3.3. The input
Expand All @@ -50,46 +50,52 @@ function addResidualLayer2(input, nChannels, nOutChannels, stride)
stride = stride or 1
-- Path 1: Convolution
-- The first layer does the downsampling and the striding
local net = cudnn.SpatialConvolution(nChannels, nOutChannels,
local path1 = nn.Sequential()
path1:add(cudnn.SpatialConvolution(nChannels, nOutChannels,
3,3, stride,stride, 1,1)
:init('weight', nninit.kaiming, {gain = 'relu'})
:init('bias', nninit.constant, 0)(input)
net = cudnn.SpatialBatchNormalization(nOutChannels)
:init('bias', nninit.constant, 0))
path1:add(cudnn.SpatialBatchNormalization(nOutChannels)
:init('weight', nninit.normal, 1.0, 0.002)
:init('bias', nninit.constant, 0)(net)
net = cudnn.ReLU(true)(net)
net = cudnn.SpatialConvolution(nOutChannels, nOutChannels,
:init('bias', nninit.constant, 0))
path1:add(cudnn.ReLU(true))
path1:add(cudnn.SpatialConvolution(nOutChannels, nOutChannels,
3,3, 1,1, 1,1)
:init('weight', nninit.kaiming, {gain = 'relu'})
:init('bias', nninit.constant, 0)(net)
:init('bias', nninit.constant, 0))
-- Should we put Batch Normalization here? I think not, because
-- BN would force the output to have unit variance, which breaks the residual
-- property of the network.
-- What about ReLU here? I think maybe not for the same reason. Figure 2
-- implies that they don't use it here
path1:add(cudnn.SpatialBatchNormalization(nOutChannels))

-- Path 2: Identity / skip connection
local skip = input
local path2 = nn.Sequential()
path2:add(nn.Identity())
if stride > 1 then
-- optional downsampling
skip = nn.SpatialAveragePooling(1, 1, stride,stride)(skip)
path2:add(nn.SpatialAveragePooling(1, 1, stride,stride))
end
if nOutChannels > nChannels then
-- optional padding
skip = nn.Padding(1, (nOutChannels - nChannels), 3)(skip)
path2:add(nn.Padding(1, (nOutChannels - nChannels), 3))
elseif nOutChannels < nChannels then
-- optional narrow, ugh.
skip = nn.Narrow(2, 1, nOutChannels)(skip)
path2:add(nn.Narrow(2, 1, nOutChannels))
-- NOTE this BREAKS with non-batch inputs!!
end

-- Add them together
net = cudnn.SpatialBatchNormalization(nOutChannels)(net)
net = nn.CAddTable(){net, skip}
net = cudnn.ReLU(true)(net)
local layer = nn.Sequential()
local concat = nn.ConcatTable(2)
concat:add(path1):add(path2)
layer:add(concat)
layer:add(nn.CAddTable())
layer:add(cudnn.ReLU(true))
-- ^ don't put a ReLU here! see http://gitxiv.com/comments/7rffyqcPLirEEsmpX

return net
return layer
end

--[[
Expand Down
10 changes: 5 additions & 5 deletions train-cifar.lua
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@ if opt.loadFrom == "" then
model = cudnn.SpatialBatchNormalization(16)(model)
model = cudnn.ReLU(true)(model)
------> 16, 32,32 First Group
for i=1,N do model = addResidualLayer2(model, 16) end
for i=1,N do model = ResidualLayer(16)(model) end
------> 32, 16,16 Second Group
model = addResidualLayer2(model, 16, 32, 2)
for i=1,N-1 do model = addResidualLayer2(model, 32) end
model = ResidualLayer(16, 32, 2)(model)
for i=1,N-1 do model = ResidualLayer(32)(model) end
------> 64, 8,8 Third Group
model = addResidualLayer2(model, 32, 64, 2)
for i=1,N-1 do model = addResidualLayer2(model, 64) end
model = ResidualLayer(32, 64, 2)(model)
for i=1,N-1 do model = ResidualLayer(64)(model) end
------> 10, 8,8 Pooling, Linear, Softmax
model = nn.SpatialAveragePooling(8,8)(model)
model = nn.Reshape(64)(model)
Expand Down