forked from BVLC/caffe
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request BVLC#3613 from longjon/py-coord-map
Python/net spec coordinate map and crop computation * longjon/py-coord-map: [pycaffe] test coord_map [pycaffe] align coord_map and BVLC#3570 Crop layer [pycaffe] document, style, and complete coord_map [pycaffe] add coord_map.py for computing induced coordinate transform
- Loading branch information
Showing
2 changed files
with
374 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
""" | ||
Determine spatial relationships between layers to relate their coordinates. | ||
Coordinates are mapped from input-to-output (forward), but can | ||
be mapped output-to-input (backward) by the inverse mapping too. | ||
This helps crop and align feature maps among other uses. | ||
""" | ||
|
||
from __future__ import division | ||
import numpy as np | ||
from caffe import layers as L | ||
|
||
PASS_THROUGH_LAYERS = ['AbsVal', 'BatchNorm', 'Bias', 'BNLL', 'Dropout', | ||
'Eltwise', 'ELU', 'Log', 'LRN', 'Exp', 'MVN', 'Power', | ||
'ReLU', 'PReLU', 'Scale', 'Sigmoid', 'Split', 'TanH', | ||
'Threshold'] | ||
|
||
|
||
def conv_params(fn): | ||
""" | ||
Extract the spatial parameters that determine the coordinate mapping: | ||
kernel size, stride, padding, and dilation. | ||
Implementation detail: Convolution, Deconvolution, and Im2col layers | ||
define these in the convolution_param message, while Pooling has its | ||
own fields in pooling_param. This method deals with these details to | ||
extract canonical parameters. | ||
""" | ||
params = fn.params.get('convolution_param', fn.params) | ||
axis = params.get('axis', 1) | ||
ks = np.array(params['kernel_size'], ndmin=1) | ||
dilation = np.array(params.get('dilation', 1), ndmin=1) | ||
assert len({'pad_h', 'pad_w', 'kernel_h', 'kernel_w', 'stride_h', | ||
'stride_w'} & set(fn.params)) == 0, \ | ||
'cropping does not support legacy _h/_w params' | ||
return (axis, np.array(params.get('stride', 1), ndmin=1), | ||
(ks - 1) * dilation + 1, | ||
np.array(params.get('pad', 0), ndmin=1)) | ||
|
||
|
||
def crop_params(fn): | ||
""" | ||
Extract the crop layer parameters with defaults. | ||
""" | ||
params = fn.params.get('crop_param', fn.params) | ||
axis = params.get('axis', 2) # default to spatial crop for N, C, H, W | ||
offset = np.array(params.get('offset', 0), ndmin=1) | ||
return (axis, offset) | ||
|
||
|
||
class UndefinedMapException(Exception): | ||
""" | ||
Exception raised for layers that do not have a defined coordinate mapping. | ||
""" | ||
pass | ||
|
||
|
||
def coord_map(fn): | ||
""" | ||
Define the coordinate mapping by its | ||
- axis | ||
- scale: output coord[i * scale] <- input_coord[i] | ||
- shift: output coord[i] <- output_coord[i + shift] | ||
s.t. the identity mapping, as for pointwise layers like ReLu, is defined by | ||
(None, 1, 0) since it is independent of axis and does not transform coords. | ||
""" | ||
if fn.type_name in ['Convolution', 'Pooling', 'Im2col']: | ||
axis, stride, ks, pad = conv_params(fn) | ||
return axis, 1 / stride, (pad - (ks - 1) / 2) / stride | ||
elif fn.type_name == 'Deconvolution': | ||
axis, stride, ks, pad = conv_params(fn) | ||
return axis, stride, (ks - 1) / 2 - pad | ||
elif fn.type_name in PASS_THROUGH_LAYERS: | ||
return None, 1, 0 | ||
elif fn.type_name == 'Crop': | ||
axis, offset = crop_params(fn) | ||
axis -= 1 # -1 for last non-coordinate dim. | ||
return axis, 1, - offset | ||
else: | ||
raise UndefinedMapException | ||
|
||
|
||
class AxisMismatchException(Exception): | ||
""" | ||
Exception raised for mappings with incompatible axes. | ||
""" | ||
pass | ||
|
||
|
||
def compose((ax1, a1, b1), (ax2, a2, b2)): | ||
""" | ||
Compose a base coord map with scale a1, shift b1 with a further coord map | ||
with scale a2, shift b2. The scales multiply and the further shift, b2, | ||
is scaled by base coord scale a1. | ||
""" | ||
if ax1 is None: | ||
ax = ax2 | ||
elif ax2 is None or ax1 == ax2: | ||
ax = ax1 | ||
else: | ||
raise AxisMismatchException | ||
return ax, a1 * a2, a1 * b2 + b1 | ||
|
||
|
||
def inverse((ax, a, b)): | ||
""" | ||
Invert a coord map by de-scaling and un-shifting; | ||
this gives the backward mapping for the gradient. | ||
""" | ||
return ax, 1 / a, -b / a | ||
|
||
|
||
def coord_map_from_to(top_from, top_to): | ||
""" | ||
Determine the coordinate mapping betweeen a top (from) and a top (to). | ||
Walk the graph to find a common ancestor while composing the coord maps for | ||
from and to until they meet. As a last step the from map is inverted. | ||
""" | ||
# We need to find a common ancestor of top_from and top_to. | ||
# We'll assume that all ancestors are equivalent here (otherwise the graph | ||
# is an inconsistent state (which we could improve this to check for)). | ||
# For now use a brute-force algorithm. | ||
|
||
def collect_bottoms(top): | ||
""" | ||
Collect the bottoms to walk for the coordinate mapping. | ||
The general rule is that all the bottoms of a layer can be mapped, as | ||
most layers have the same coordinate mapping for each bottom. | ||
Crop layer is a notable exception. Only the first/cropped bottom is | ||
mappable; the second/dimensions bottom is excluded from the walk. | ||
""" | ||
bottoms = top.fn.inputs | ||
if top.fn.type_name == 'Crop': | ||
bottoms = bottoms[:1] | ||
return bottoms | ||
|
||
# walk back from top_from, keeping the coord map as we go | ||
from_maps = {top_from: (None, 1, 0)} | ||
frontier = {top_from} | ||
while frontier: | ||
top = frontier.pop() | ||
try: | ||
bottoms = collect_bottoms(top) | ||
for bottom in bottoms: | ||
from_maps[bottom] = compose(from_maps[top], coord_map(top.fn)) | ||
frontier.add(bottom) | ||
except UndefinedMapException: | ||
pass | ||
|
||
# now walk back from top_to until we hit a common blob | ||
to_maps = {top_to: (None, 1, 0)} | ||
frontier = {top_to} | ||
while frontier: | ||
top = frontier.pop() | ||
if top in from_maps: | ||
return compose(to_maps[top], inverse(from_maps[top])) | ||
try: | ||
bottoms = collect_bottoms(top) | ||
for bottom in bottoms: | ||
to_maps[bottom] = compose(to_maps[top], coord_map(top.fn)) | ||
frontier.add(bottom) | ||
except UndefinedMapException: | ||
continue | ||
|
||
# if we got here, we did not find a blob in common | ||
raise RuntimeError('Could not compute map between tops; are they ' | ||
'connected by spatial layers?') | ||
|
||
|
||
def crop(top_from, top_to): | ||
""" | ||
Define a Crop layer to crop a top (from) to another top (to) by | ||
determining the coordinate mapping between the two and net spec'ing | ||
the axis and shift parameters of the crop. | ||
""" | ||
ax, a, b = coord_map_from_to(top_from, top_to) | ||
assert (a == 1).all(), 'scale mismatch on crop (a = {})'.format(a) | ||
assert (b <= 0).all(), 'cannot crop negative offset (b = {})'.format(b) | ||
assert (np.round(b) == b).all(), 'cannot crop noninteger offset ' \ | ||
'(b = {})'.format(b) | ||
return L.Crop(top_from, top_to, | ||
crop_param=dict(axis=ax + 1, # +1 for first cropping dim. | ||
offset=list(-np.round(b).astype(int)))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
import unittest | ||
|
||
import numpy as np | ||
import random | ||
|
||
import caffe | ||
from caffe import layers as L | ||
from caffe import params as P | ||
from caffe.coord_map import coord_map_from_to, crop | ||
|
||
|
||
def coord_net_spec(ks=3, stride=1, pad=0, pool=2, dstride=2, dpad=0): | ||
""" | ||
Define net spec for simple conv-pool-deconv pattern common to all | ||
coordinate mapping tests. | ||
""" | ||
n = caffe.NetSpec() | ||
n.data = L.Input(shape=dict(dim=[2, 1, 100, 100])) | ||
n.aux = L.Input(shape=dict(dim=[2, 1, 20, 20])) | ||
n.conv = L.Convolution( | ||
n.data, num_output=10, kernel_size=ks, stride=stride, pad=pad) | ||
n.pool = L.Pooling( | ||
n.conv, pool=P.Pooling.MAX, kernel_size=pool, stride=pool, pad=0) | ||
# for upsampling kernel size is 2x stride | ||
try: | ||
deconv_ks = [s*2 for s in dstride] | ||
except: | ||
deconv_ks = dstride*2 | ||
n.deconv = L.Deconvolution( | ||
n.pool, num_output=10, kernel_size=deconv_ks, stride=dstride, pad=dpad) | ||
return n | ||
|
||
|
||
class TestCoordMap(unittest.TestCase): | ||
def setUp(self): | ||
pass | ||
|
||
def test_conv_pool_deconv(self): | ||
""" | ||
Map through conv, pool, and deconv. | ||
""" | ||
n = coord_net_spec() | ||
# identity for 2x pool, 2x deconv | ||
ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
self.assertEquals(ax, 1) | ||
self.assertEquals(a, 1) | ||
self.assertEquals(b, 0) | ||
# shift-by-one for 4x pool, 4x deconv | ||
n = coord_net_spec(pool=4, dstride=4) | ||
ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
self.assertEquals(ax, 1) | ||
self.assertEquals(a, 1) | ||
self.assertEquals(b, -1) | ||
|
||
def test_pass(self): | ||
""" | ||
A pass-through layer (ReLU) and conv (1x1, stride 1, pad 0) | ||
both do identity mapping. | ||
""" | ||
n = coord_net_spec() | ||
ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
n.relu = L.ReLU(n.deconv) | ||
n.conv1x1 = L.Convolution( | ||
n.relu, num_output=10, kernel_size=1, stride=1, pad=0) | ||
for top in [n.relu, n.conv1x1]: | ||
ax_pass, a_pass, b_pass = coord_map_from_to(top, n.data) | ||
self.assertEquals(ax, ax_pass) | ||
self.assertEquals(a, a_pass) | ||
self.assertEquals(b, b_pass) | ||
|
||
def test_padding(self): | ||
""" | ||
Padding conv adds offset while padding deconv subtracts offset. | ||
""" | ||
n = coord_net_spec() | ||
ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
pad = random.randint(0, 10) | ||
# conv padding | ||
n = coord_net_spec(pad=pad) | ||
_, a_pad, b_pad = coord_map_from_to(n.deconv, n.data) | ||
self.assertEquals(a, a_pad) | ||
self.assertEquals(b - pad, b_pad) | ||
# deconv padding | ||
n = coord_net_spec(dpad=pad) | ||
_, a_pad, b_pad = coord_map_from_to(n.deconv, n.data) | ||
self.assertEquals(a, a_pad) | ||
self.assertEquals(b + pad, b_pad) | ||
# pad both to cancel out | ||
n = coord_net_spec(pad=pad, dpad=pad) | ||
_, a_pad, b_pad = coord_map_from_to(n.deconv, n.data) | ||
self.assertEquals(a, a_pad) | ||
self.assertEquals(b, b_pad) | ||
|
||
def test_multi_conv(self): | ||
""" | ||
Multiple bottoms/tops of a layer are identically mapped. | ||
""" | ||
n = coord_net_spec() | ||
# multi bottom/top | ||
n.conv_data, n.conv_aux = L.Convolution( | ||
n.data, n.aux, ntop=2, num_output=10, kernel_size=5, stride=2, | ||
pad=0) | ||
ax1, a1, b1 = coord_map_from_to(n.conv_data, n.data) | ||
ax2, a2, b2 = coord_map_from_to(n.conv_aux, n.aux) | ||
self.assertEquals(ax1, ax2) | ||
self.assertEquals(a1, a2) | ||
self.assertEquals(b1, b2) | ||
|
||
def test_rect(self): | ||
""" | ||
Anisotropic mapping is equivalent to its isotropic parts. | ||
""" | ||
n3x3 = coord_net_spec(ks=3, stride=1, pad=0) | ||
n5x5 = coord_net_spec(ks=5, stride=2, pad=10) | ||
n3x5 = coord_net_spec(ks=[3, 5], stride=[1, 2], pad=[0, 10]) | ||
ax_3x3, a_3x3, b_3x3 = coord_map_from_to(n3x3.deconv, n3x3.data) | ||
ax_5x5, a_5x5, b_5x5 = coord_map_from_to(n5x5.deconv, n5x5.data) | ||
ax_3x5, a_3x5, b_3x5 = coord_map_from_to(n3x5.deconv, n3x5.data) | ||
self.assertTrue(ax_3x3 == ax_5x5 == ax_3x5) | ||
self.assertEquals(a_3x3, a_3x5[0]) | ||
self.assertEquals(b_3x3, b_3x5[0]) | ||
self.assertEquals(a_5x5, a_3x5[1]) | ||
self.assertEquals(b_5x5, b_3x5[1]) | ||
|
||
def test_nd_conv(self): | ||
""" | ||
ND conv maps the same way in more dimensions. | ||
""" | ||
n = caffe.NetSpec() | ||
# define data with 3 spatial dimensions, otherwise the same net | ||
n.data = L.Input(shape=dict(dim=[2, 3, 100, 100, 100])) | ||
n.conv = L.Convolution( | ||
n.data, num_output=10, kernel_size=[3, 3, 3], stride=[1, 1, 1], | ||
pad=[0, 1, 2]) | ||
n.pool = L.Pooling( | ||
n.conv, pool=P.Pooling.MAX, kernel_size=2, stride=2, pad=0) | ||
n.deconv = L.Deconvolution( | ||
n.pool, num_output=10, kernel_size=4, stride=2, pad=0) | ||
ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
self.assertEquals(ax, 1) | ||
self.assertTrue(len(a) == len(b)) | ||
self.assertTrue(np.all(a == 1)) | ||
self.assertEquals(b[0] - 1, b[1]) | ||
self.assertEquals(b[1] - 1, b[2]) | ||
|
||
def test_crop_of_crop(self): | ||
""" | ||
Map coordinates through Crop layer: | ||
crop an already-cropped output to the input and check change in offset. | ||
""" | ||
n = coord_net_spec() | ||
offset = random.randint(0, 10) | ||
ax, a, b = coord_map_from_to(n.deconv, n.data) | ||
n.crop = L.Crop(n.deconv, n.data, axis=2, offset=offset) | ||
ax_crop, a_crop, b_crop = coord_map_from_to(n.crop, n.data) | ||
self.assertEquals(ax, ax_crop) | ||
self.assertEquals(a, a_crop) | ||
self.assertEquals(b + offset, b_crop) | ||
|
||
def test_crop_helper(self): | ||
""" | ||
Define Crop layer by crop(). | ||
""" | ||
n = coord_net_spec() | ||
crop(n.deconv, n.data) | ||
|
||
def test_catch_unconnected(self): | ||
""" | ||
Catch mapping spatially unconnected tops. | ||
""" | ||
n = coord_net_spec() | ||
n.ip = L.InnerProduct(n.deconv, num_output=10) | ||
with self.assertRaises(RuntimeError): | ||
coord_map_from_to(n.ip, n.data) | ||
|
||
def test_catch_scale_mismatch(self): | ||
""" | ||
Catch incompatible scales, such as when the top to be cropped | ||
is mapped to a differently strided reference top. | ||
""" | ||
n = coord_net_spec(pool=3, dstride=2) # pool 3x but deconv 2x | ||
with self.assertRaises(AssertionError): | ||
crop(n.deconv, n.data) | ||
|
||
def test_catch_negative_crop(self): | ||
""" | ||
Catch impossible offsets, such as when the top to be cropped | ||
is mapped to a larger reference top. | ||
""" | ||
n = coord_net_spec(dpad=10) # make output smaller than input | ||
with self.assertRaises(AssertionError): | ||
crop(n.deconv, n.data) |