From 2837d50c6a8f333789705fb63b891c94a72675f6 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Wed, 15 Mar 2017 22:43:06 -0400
Subject: [PATCH 01/21] densenet dilation for segmentation

---
 keras_contrib/applications/densenet.py | 58 +++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 11 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index 71963ccb5..c4fd4640e 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -37,7 +37,7 @@
 def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers_per_block=-1,
              bottleneck=False, reduction=0.0, dropout_rate=0.0, weight_decay=1E-4,
              include_top=True, weights='cifar10', input_tensor=None, input_shape=None,
-             classes=10):
+             classes=10, dilation_rate=1, pooling="avg"):
     """Instantiate the DenseNet architecture,
         optionally loading weights pre-trained
         on CIFAR-10. Note that when using TensorFlow,
@@ -83,6 +83,11 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
             classes: optional number of classes to classify images
                 into, only to be specified if `include_top` is True, and
                 if no `weights` argument is specified.
+            dilation_rate: an integer or tuple/list of 2 integers, specifying
+                the dilation rate to use for dilated convolution. Can be a
+                single integer to specify the same value for all spatial
+                dimensions.
+            pooling: Data pooling to reduce resolution, one of "avg", "max", None
 
         # Returns
             A Keras model instance.
@@ -113,8 +118,9 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
             img_input = input_tensor
 
     x = __create_dense_net(classes, img_input, include_top, depth, nb_dense_block,
-                           growth_rate, nb_filter, nb_layers_per_block, bottleneck, reduction,
-                           dropout_rate, weight_decay)
+                           growth_rate, nb_filter, nb_layers_per_block, bottleneck,
+                           reduction, dropout_rate, weight_decay, dilation_rate,
+                           pooling, input_shape)
 
     # Ensure that the model takes into account
     # any potential predecessors of `input_tensor`.
@@ -321,7 +327,8 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
     return x
 
 
-def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4):
+def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None,
+                       weight_decay=1E-4, dilation_rate=1, pooling="avg" ):
     ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D
 
     Args:
@@ -331,6 +338,9 @@ def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None, weight
                     in the transition block.
         dropout_rate: dropout rate
         weight_decay: weight decay factor
+        dilation_rate: an integer or tuple/list of 2 integers, specifying the dilation rate to
+        use for dilated convolution. Can be a single integer to specify the same value for
+        all spatial dimensions.
 
     Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool
     '''
@@ -341,10 +351,14 @@ def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None, weight
                            beta_regularizer=l2(weight_decay))(ip)
     x = Activation('relu')(x)
     x = Convolution2D(int(nb_filter * compression), 1, 1, init="he_uniform", border_mode="same", bias=False,
-                      W_regularizer=l2(weight_decay))(x)
+                      W_regularizer=l2(weight_decay), dilation_rate=dilation_rate)(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
-    x = AveragePooling2D((2, 2), strides=(2, 2))(x)
+
+    if pooling == "avg":
+        x = AveragePooling2D((2, 2), strides=(2, 2))(x)
+    elif pooling == "max":
+        x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 
     return x
 
@@ -418,8 +432,11 @@ def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None,
     return x
 
 
-def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1,
-                       nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1E-4):
+def __create_dense_net(nb_classes, img_input, include_top, depth=40,
+                       nb_dense_block=3, growth_rate=12, nb_filter=-1,
+                       nb_layers_per_block=-1, bottleneck=False, reduction=0.0,
+                       dropout_rate=None, weight_decay=1E-4, dilation_rate=1,
+                       pooling="avg", input_shape=None):
     ''' Build the DenseNet model
 
     Args:
@@ -440,6 +457,12 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_bl
         reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression
         dropout_rate: dropout rate
         weight_decay: weight decay
+        dilation_rate: an integer or tuple/list of 2 integers, specifying
+            the dilation rate to use for dilated convolution. Can be a
+            single integer to specify the same value for all spatial
+            dimensions.
+        pooling: Data pooling to reduce resolution, one of "avg", "max", None
+        input_shape: Only used for shape inference in fully convolutional networks.
 
     Returns: keras tensor with nb_layers of conv_block appended
     '''
@@ -487,7 +510,7 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_bl
                                      dropout_rate=dropout_rate, weight_decay=weight_decay)
         # add transition_block
         x = __transition_block(x, nb_filter, compression=compression, dropout_rate=dropout_rate,
-                               weight_decay=weight_decay)
+                               weight_decay=weight_decay, dilation_rate=dilation_rate, pooling=pooling)
         nb_filter = int(nb_filter * compression)
 
     # The last dense_block does not have a transition_block
@@ -497,10 +520,23 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_bl
     x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(x)
     x = Activation('relu')(x)
-    x = GlobalAveragePooling2D()(x)
+    if pooling is not None:
+        x = GlobalAveragePooling2D()(x)
 
-    if include_top:
+    if include_top and pooling is not None:
         x = Dense(nb_classes, activation='softmax', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(x)
+    elif include_top and pooling is None:
+        x = Convolution2D(nb_classes, 1, 1, activation='linear', border_mode='same', W_regularizer=l2(weight_decay),
+                          bias=False)(x)
+
+        if K.image_dim_ordering() == 'th':
+            channel, row, col = input_shape
+        else:
+            row, col, channel = input_shape
+
+        x = Reshape((row * col, nb_classes))(x)
+        x = Activation('softmax')(x)
+        x = Reshape((row, col, nb_classes))(x)
 
     return x
 

From dc3b61a5b4c7b82fc91a734830daca703073df5c Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 24 Mar 2017 09:20:42 -0400
Subject: [PATCH 02/21] Keras-2 dim_ordering to data_format

---
 keras_contrib/applications/densenet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index c4fd4640e..41a360705 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -106,7 +106,7 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
     input_shape = _obtain_input_shape(input_shape,
                                       default_size=32,
                                       min_size=8,
-                                      dim_ordering=K.image_dim_ordering(),
+                                      data_format=K.image_data_format(),
                                       include_top=include_top)
 
     if input_tensor is None:

From b8ef434944cefc0b64f197e471f9e6b3d27ac790 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 24 Mar 2017 10:03:15 -0400
Subject: [PATCH 03/21] densenet.py include_top adapted for segmentation

---
 keras_contrib/applications/densenet.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index 41a360705..e3fb1b3c6 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -107,7 +107,9 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
                                       default_size=32,
                                       min_size=8,
                                       data_format=K.image_data_format(),
-                                      include_top=include_top)
+                                      # If doing segmentation we still include top
+                                      # but _obtain_input_shape only supports labeling.
+                                      include_top=(include_top and dilation_rate is not 1)
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
@@ -137,7 +139,7 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
                 (bottleneck is False) and (reduction == 0.0) and (dropout_rate == 0.0) and (weight_decay == 1E-4):
             # Default parameters match. Weights for this model exist:
 
-            if K.image_dim_ordering() == 'th':
+            if K.data_format() == 'channels_first':
                 if include_top:
                     weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels.h5',
                                             TH_WEIGHTS_PATH,
@@ -259,8 +261,10 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
     input_shape = _obtain_input_shape(input_shape,
                                       default_size=32,
                                       min_size=16,
-                                      dim_ordering=K.image_dim_ordering(),
-                                      include_top=include_top)
+                                      data_format=K.image_data_format(),
+                                      # If doing segmentation we still include top
+                                      # but _obtain_input_shape only supports labeling.
+                                      include_top=(include_top and dilation_rate is not 1))
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
@@ -529,7 +533,7 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
         x = Convolution2D(nb_classes, 1, 1, activation='linear', border_mode='same', W_regularizer=l2(weight_decay),
                           bias=False)(x)
 
-        if K.image_dim_ordering() == 'th':
+        if K.data_format() == 'channels_first':
             channel, row, col = input_shape
         else:
             row, col, channel = input_shape
@@ -636,7 +640,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
 
     skip_list = skip_list[::-1]  # reverse the skip list
 
-    if K.image_dim_ordering() == 'th':
+    if K.data_format() == 'channels_first':
         out_shape = [batchsize, nb_filter, rows // 16, cols // 16]
     else:
         out_shape = [batchsize, rows // 16, cols // 16, nb_filter]
@@ -645,7 +649,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
     for block_idx in range(nb_dense_block):
         n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx]
 
-        if K.image_dim_ordering() == 'th':
+        if K.data_format() == 'channels_first':
             out_shape[1] = n_filters_keep
         else:
             out_shape[3] = n_filters_keep
@@ -659,7 +663,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
         # concatenate the skip connection with the transition block
         x = merge([t, skip_list[block_idx]], mode='concat', concat_axis=concat_axis)
 
-        if K.image_dim_ordering() == 'th':
+        if K.data_format() == 'channels_first':
             out_shape[2] *= 2
             out_shape[3] *= 2
         else:
@@ -676,7 +680,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
         x = Convolution2D(nb_classes, 1, 1, activation='linear', border_mode='same', W_regularizer=l2(weight_decay),
                           bias=False)(x)
 
-        if K.image_dim_ordering() == 'th':
+        if K.data_format() == 'channels_first':
             channel, row, col = input_shape
         else:
             row, col, channel = input_shape

From 08cdc73b39554a87535f5ade25dc4c80ecaf73d4 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 24 Mar 2017 10:07:43 -0400
Subject: [PATCH 04/21] densenet.py typo fix

---
 keras_contrib/applications/densenet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index e3fb1b3c6..751f8fa38 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -109,7 +109,7 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
                                       data_format=K.image_data_format(),
                                       # If doing segmentation we still include top
                                       # but _obtain_input_shape only supports labeling.
-                                      include_top=(include_top and dilation_rate is not 1)
+                                      include_top=(include_top and dilation_rate is not 1))
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)

From 94826fdf430669392de9d9d58475703e9da2c407 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 24 Mar 2017 10:11:07 -0400
Subject: [PATCH 05/21] densenet.py typo

---
 keras_contrib/applications/densenet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index 751f8fa38..c6e9df042 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -109,7 +109,7 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
                                       data_format=K.image_data_format(),
                                       # If doing segmentation we still include top
                                       # but _obtain_input_shape only supports labeling.
-                                      include_top=(include_top and dilation_rate is not 1))
+                                      include_top=(include_top and dilation_rate is 1))
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
@@ -264,7 +264,7 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
                                       data_format=K.image_data_format(),
                                       # If doing segmentation we still include top
                                       # but _obtain_input_shape only supports labeling.
-                                      include_top=(include_top and dilation_rate is not 1))
+                                      include_top=(include_top and dilation_rate is 1))
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)

From eddb0b966d5fdbbf0ba0d726402a3c075cdb946f Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 24 Mar 2017 10:19:09 -0400
Subject: [PATCH 06/21] densenet.py fix length bug

---
 keras_contrib/applications/densenet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index c6e9df042..ded40d99f 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -473,7 +473,7 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
 
     concat_axis = 1 if K.image_dim_ordering() == "th" else -1
 
-    assert (depth - 4) % 3 == 0, "Depth must be 3 N + 4"
+    assert (depth - 4) % nb_dense_block == 0, "Depth must be nb_dense_block * N + 4"
     if reduction != 0.0:
         assert reduction <= 1.0 and reduction > 0.0, "reduction value must lie between 0.0 and 1.0"
 
@@ -487,7 +487,7 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
         nb_layers = nb_layers[:-1]
     else:
         if nb_layers_per_block == -1:
-            count = int((depth - 4) / 3)
+            count = int((depth - 4) / nb_dense_block)
             nb_layers = [count for _ in range(nb_dense_block)]
             final_nb_layer = count
         else:

From d0cb8591b382e8e5e92f0c6dac030d22b2505f1f Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 24 Mar 2017 10:37:20 -0400
Subject: [PATCH 07/21] fix layer count

---
 keras_contrib/applications/densenet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index ded40d99f..1415e9293 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -473,7 +473,7 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
 
     concat_axis = 1 if K.image_dim_ordering() == "th" else -1
 
-    assert (depth - 4) % nb_dense_block == 0, "Depth must be nb_dense_block * N + 4"
+    assert (depth - 4) % 3 == 0, "Depth must be nb_dense_block * N + 4"
     if reduction != 0.0:
         assert reduction <= 1.0 and reduction > 0.0, "reduction value must lie between 0.0 and 1.0"
 
@@ -487,7 +487,7 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
         nb_layers = nb_layers[:-1]
     else:
         if nb_layers_per_block == -1:
-            count = int((depth - 4) / nb_dense_block)
+            count = int((depth - 4) / 3)
             nb_layers = [count for _ in range(nb_dense_block)]
             final_nb_layer = count
         else:

From 4f293248305a4ff703908acc038a794f461f4437 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 24 Mar 2017 10:47:58 -0400
Subject: [PATCH 08/21] densenet.py typo fix

---
 keras_contrib/applications/densenet.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index 1415e9293..d0143b56e 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -139,7 +139,7 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
                 (bottleneck is False) and (reduction == 0.0) and (dropout_rate == 0.0) and (weight_decay == 1E-4):
             # Default parameters match. Weights for this model exist:
 
-            if K.data_format() == 'channels_first':
+            if K.image_data_format() == 'channels_first':
                 if include_top:
                     weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels.h5',
                                             TH_WEIGHTS_PATH,
@@ -533,7 +533,7 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
         x = Convolution2D(nb_classes, 1, 1, activation='linear', border_mode='same', W_regularizer=l2(weight_decay),
                           bias=False)(x)
 
-        if K.data_format() == 'channels_first':
+        if K.image_data_format() == 'channels_first':
             channel, row, col = input_shape
         else:
             row, col, channel = input_shape
@@ -640,7 +640,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
 
     skip_list = skip_list[::-1]  # reverse the skip list
 
-    if K.data_format() == 'channels_first':
+    if K.image_data_format() == 'channels_first':
         out_shape = [batchsize, nb_filter, rows // 16, cols // 16]
     else:
         out_shape = [batchsize, rows // 16, cols // 16, nb_filter]
@@ -649,7 +649,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
     for block_idx in range(nb_dense_block):
         n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx]
 
-        if K.data_format() == 'channels_first':
+        if K.image_data_format() == 'channels_first':
             out_shape[1] = n_filters_keep
         else:
             out_shape[3] = n_filters_keep
@@ -663,7 +663,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
         # concatenate the skip connection with the transition block
         x = merge([t, skip_list[block_idx]], mode='concat', concat_axis=concat_axis)
 
-        if K.data_format() == 'channels_first':
+        if K.image_data_format() == 'channels_first':
             out_shape[2] *= 2
             out_shape[3] *= 2
         else:
@@ -680,7 +680,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
         x = Convolution2D(nb_classes, 1, 1, activation='linear', border_mode='same', W_regularizer=l2(weight_decay),
                           bias=False)(x)
 
-        if K.data_format() == 'channels_first':
+        if K.image_data_format() == 'channels_first':
             channel, row, col = input_shape
         else:
             row, col, channel = input_shape

From bf95474bdd48a85df1c443db0fa108bf28dcf86a Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 24 Mar 2017 18:17:46 -0400
Subject: [PATCH 09/21] densenet.py Keras-2 support except for concatenate due
 to keras issue 5972 https://github.com/fchollet/keras/issues/5972

---
 keras_contrib/applications/densenet.py | 109 ++++++++++++++-----------
 1 file changed, 63 insertions(+), 46 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index d0143b56e..750095b3c 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -14,10 +14,11 @@
 
 from keras.models import Model
 from keras.layers.core import Dense, Dropout, Activation, Reshape
-from keras.layers.convolutional import Convolution2D, Deconvolution2D, AtrousConvolution2D, UpSampling2D
+from keras.layers.convolutional import Conv2D, Conv2DTranspose, UpSampling2D
 from keras.layers.pooling import AveragePooling2D
 from keras.layers.pooling import GlobalAveragePooling2D
 from keras.layers import Input, merge
+from keras.layers.merge import concatenate
 from keras.layers.normalization import BatchNormalization
 from keras.regularizers import l2
 from keras.utils.layer_utils import convert_all_kernels_in_model
@@ -107,8 +108,9 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
                                       default_size=32,
                                       min_size=8,
                                       data_format=K.image_data_format(),
-                                      # If doing segmentation we still include top
-                                      # but _obtain_input_shape only supports labeling.
+                                      # If doing segmentation we still include
+                                      # top but _obtain_input_shape only
+                                      # supports labeling.
                                       include_top=(include_top and dilation_rate is 1))
 
     if input_tensor is None:
@@ -182,7 +184,8 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
 def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_block=4,
                 reduction=0.0, dropout_rate=0.0, weight_decay=1E-4, init_conv_filters=48,
                 include_top=True, weights=None, input_tensor=None, classes=1,
-                upsampling_conv=128, upsampling_type='upsampling', batchsize=None):
+                upsampling_conv=128, upsampling_type='upsampling', batchsize=None,
+                dilation_rate=1):
     """Instantiate the DenseNet FCN architecture.
         Note that when using TensorFlow,
         for best performance you should set
@@ -246,7 +249,8 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
                          'batch size must be provided in batchsize parameter.')
 
     if input_shape is None:
-        raise ValueError('For fully convolutional models, input shape must be supplied.')
+        raise ValueError(
+            'For fully convolutional models, input shape must be supplied.')
 
     if type(nb_layers_per_block) is not list and nb_dense_block < 1:
         raise ValueError('Number of dense layers per block must be greater than 1. Argument '
@@ -263,8 +267,9 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
                                       min_size=16,
                                       data_format=K.image_data_format(),
                                       # If doing segmentation we still include top
-                                      # but _obtain_input_shape only supports labeling.
-                                      include_top=(include_top and dilation_rate is 1))
+                                      # but _obtain_input_shape only supports
+                                      # labeling, not segmentation networks.
+                                      include_top=False)
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
@@ -304,17 +309,19 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
     Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck)
     '''
 
-    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+    concat_axis = 1 if K.image_data_format() == "channels_first" else -1
 
-    x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+    x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(ip)
     x = Activation('relu')(x)
 
     if bottleneck:
-        inter_channel = nb_filter * 4  # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua
+        # Obtained from
+        # https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua
+        inter_channel = nb_filter * 4
 
-        x = Convolution2D(inter_channel, 1, 1, init='he_uniform', border_mode='same', bias=False,
-                          W_regularizer=l2(weight_decay))(x)
+        x = Convolution2D(inter_channel, (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False,
+                          kernel_regularizer=l2(weight_decay))(x)
 
         if dropout_rate:
             x = Dropout(dropout_rate)(x)
@@ -323,8 +330,8 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
                                beta_regularizer=l2(weight_decay))(x)
         x = Activation('relu')(x)
 
-    x = Convolution2D(nb_filter, 3, 3, init="he_uniform", border_mode="same", bias=False,
-                      W_regularizer=l2(weight_decay))(x)
+    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform", padding="same", use_bias=False,
+               kernel_regularizer=l2(weight_decay))(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
 
@@ -332,7 +339,7 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
 
 
 def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None,
-                       weight_decay=1E-4, dilation_rate=1, pooling="avg" ):
+                       weight_decay=1E-4, dilation_rate=1, pooling="avg"):
     ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D
 
     Args:
@@ -349,13 +356,13 @@ def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None,
     Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool
     '''
 
-    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
 
-    x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+    x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(ip)
     x = Activation('relu')(x)
-    x = Convolution2D(int(nb_filter * compression), 1, 1, init="he_uniform", border_mode="same", bias=False,
-                      W_regularizer=l2(weight_decay), dilation_rate=dilation_rate)(x)
+    x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer="he_uniform", padding="same", use_bias=False,
+                      kernel_regularizer=l2(weight_decay), dilation_rate=dilation_rate)(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
 
@@ -385,15 +392,17 @@ def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropou
     Returns: keras tensor with nb_layers of conv_block appended
     '''
 
-    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
 
     x_list = [x]
 
     for i in range(nb_layers):
-        x = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay)
+        x = __conv_block(x, growth_rate, bottleneck,
+                         dropout_rate, weight_decay)
         x_list.append(x)
 
         x = merge(x_list, mode='concat', concat_axis=concat_axis)
+        # x = concatenate(x_list, concat_axis)
 
         if grow_nb_filters:
             nb_filter += growth_rate
@@ -420,18 +429,18 @@ def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None,
     if type == 'upsampling':
         x = UpSampling2D()(ip)
     elif type == 'subpixel':
-        x = Convolution2D(nb_filters, 3, 3, activation="relu", border_mode='same', W_regularizer=l2(weight_decay),
-                          bias=False, init='he_uniform')(ip)
+        x = Conv2D(nb_filters, 3, 3, activation="relu", padding='same', kernel_regularizer=l2(weight_decay),
+                          use_bias=False, kernel_initializer='he_uniform')(ip)
         x = SubPixelUpscaling(scale_factor=2)(x)
-        x = Convolution2D(nb_filters, 3, 3, activation="relu", border_mode='same', W_regularizer=l2(weight_decay),
-                          bias=False, init='he_uniform')(x)
+        x = Conv2D(nb_filters, 3, 3, activation="relu", padding='same', kernel_regularizer=l2(weight_decay),
+                          use_bias=False, kernel_initializer='he_uniform')(x)
     elif type == 'atrous':
         # waiting on https://github.com/fchollet/keras/issues/4018
-        x = AtrousConvolution2D(nb_filters, 3, 3, activation="relu", W_regularizer=l2(weight_decay),
-                                bias=False, atrous_rate=(2, 2), init='he_uniform')(ip)
+        x = Conv2D(nb_filters, 3, 3, activation="relu", kernel_regularizer=l2(weight_decay),
+                   use_bias=False, atrous_rate=(2, 2), kernel_initializer='he_uniform')(ip)
     else:
-        x = Deconvolution2D(nb_filters, 3, 3, output_shape, activation='relu', border_mode='same',
-                            subsample=(2, 2), init='he_uniform')(ip)
+        x = Conv2DTranspose(nb_filters, 3, 3, output_shape, activation='relu', padding='same',
+                            subsample=(2, 2), kernel_initializer='he_uniform')(ip)
 
     return x
 
@@ -471,7 +480,7 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
     Returns: keras tensor with nb_layers of conv_block appended
     '''
 
-    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
 
     assert (depth - 4) % 3 == 0, "Depth must be nb_dense_block * N + 4"
     if reduction != 0.0:
@@ -505,8 +514,8 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
     compression = 1.0 - reduction
 
     # Initial convolution
-    x = Convolution2D(nb_filter, 3, 3, init="he_uniform", border_mode="same", name="initial_conv2D", bias=False,
-                      W_regularizer=l2(weight_decay))(img_input)
+    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform", padding="same", name="initial_conv2D", use_bias=False,
+               kernel_regularizer=l2(weight_decay))(img_input)
 
     # Add dense blocks
     for block_idx in range(nb_dense_block - 1):
@@ -521,17 +530,18 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
     x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck,
                                  dropout_rate=dropout_rate, weight_decay=weight_decay)
 
-    x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+    x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(x)
     x = Activation('relu')(x)
     if pooling is not None:
         x = GlobalAveragePooling2D()(x)
 
     if include_top and pooling is not None:
-        x = Dense(nb_classes, activation='softmax', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(x)
+        x = Dense(nb_classes, activation='softmax', kernel_regularizer=l2(
+            weight_decay), bias_regularizer=l2(weight_decay))(x)
     elif include_top and pooling is None:
-        x = Convolution2D(nb_classes, 1, 1, activation='linear', border_mode='same', W_regularizer=l2(weight_decay),
-                          bias=False)(x)
+        x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay),
+                   use_bias=False)(x)
 
         if K.image_data_format() == 'channels_first':
             channel, row, col = input_shape
@@ -577,7 +587,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
     Returns: keras tensor with nb_layers of conv_block appended
     '''
 
-    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+    concat_axis = 1 if K.image_data_format() == "channels_first" else -1
 
     if concat_axis == 1:  # th dim ordering
         _, rows, cols = input_shape
@@ -588,7 +598,8 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
         assert reduction <= 1.0 and reduction > 0.0, "reduction value must lie between 0.0 and 1.0"
 
     # check if upsampling_conv has minimum number of filters
-    # minimum is set to 12, as at least 3 color channels are needed for correct upsampling
+    # minimum is set to 12, as at least 3 color channels are needed for
+    # correct upsampling
     assert nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0, "Parameter `upsampling_conv` number of channels must " \
                                                                     "be a positive number divisible by 4 and greater " \
                                                                     "than 12"
@@ -611,8 +622,8 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
     compression = 1.0 - reduction
 
     # Initial convolution
-    x = Convolution2D(init_conv_filters, 3, 3, init="he_uniform", border_mode="same", name="initial_conv2D", bias=False,
-                      W_regularizer=l2(weight_decay))(img_input)
+    x = Conv2D(init_conv_filters, (3, 3), kernel_initializer="he_uniform", padding="same", name="initial_conv2D", use_bias=False,
+               kernel_regularizer=l2(weight_decay))(img_input)
 
     nb_filter = init_conv_filters
 
@@ -630,10 +641,12 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
         x = __transition_block(x, nb_filter, compression=compression, dropout_rate=dropout_rate,
                                weight_decay=weight_decay)
 
-        nb_filter = int(nb_filter * compression)  # this is calculated inside transition_down_block
+        # this is calculated inside transition_down_block
+        nb_filter = int(nb_filter * compression)
 
     # The last dense_block does not have a transition_down_block
-    # return the concatenated feature maps without the concatenation of the input
+    # return the concatenated feature maps without the concatenation of the
+    # input
     _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate,
                                               dropout_rate=dropout_rate, weight_decay=weight_decay,
                                               return_concat_list=True)
@@ -655,13 +668,17 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
             out_shape[3] = n_filters_keep
 
         # upsampling block must upsample only the feature maps (concat_list[1:]),
-        # not the concatenation of the input with the feature maps (concat_list[0].
+        # not the concatenation of the input with the feature maps
+        # (concat_list[0].
         l = merge(concat_list[1:], mode='concat', concat_axis=concat_axis)
+        # l = concatenate(concat_list[1:], axis=concat_axis)
 
-        t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type, output_shape=out_shape)
+        t = __transition_up_block(
+            l, nb_filters=n_filters_keep, type=upsampling_type, output_shape=out_shape)
 
         # concatenate the skip connection with the transition block
         x = merge([t, skip_list[block_idx]], mode='concat', concat_axis=concat_axis)
+        # x = concatenate([t, skip_list[block_idx]], axis=concat_axis)
 
         if K.image_data_format() == 'channels_first':
             out_shape[2] *= 2
@@ -677,8 +694,8 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
                                                   return_concat_list=True, grow_nb_filters=False)
 
     if include_top:
-        x = Convolution2D(nb_classes, 1, 1, activation='linear', border_mode='same', W_regularizer=l2(weight_decay),
-                          bias=False)(x)
+        x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay),
+                   use_bias=False)(x)
 
         if K.image_data_format() == 'channels_first':
             channel, row, col = input_shape

From 8c1d1ed3d9407badacad722af85b7ed107c80323 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 24 Mar 2017 18:26:11 -0400
Subject: [PATCH 10/21] densenet.py Convolution2D to Conv2D

---
 keras_contrib/applications/densenet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index 750095b3c..af9177d35 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -320,7 +320,7 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
         # https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua
         inter_channel = nb_filter * 4
 
-        x = Convolution2D(inter_channel, (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False,
+        x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False,
                           kernel_regularizer=l2(weight_decay))(x)
 
         if dropout_rate:

From 6b873669d6d34e975a294c15078f1e92a53a0288 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Tue, 28 Mar 2017 17:06:02 -0400
Subject: [PATCH 11/21] cifar10_densenet.py typo fix

---
 examples/cifar10_densenet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cifar10_densenet.py b/examples/cifar10_densenet.py
index c4959193c..8fc068718 100644
--- a/examples/cifar10_densenet.py
+++ b/examples/cifar10_densenet.py
@@ -26,7 +26,7 @@
 img_channels = 3
 
 # Parameters for the DenseNet model builder
-img_dim = (img_channels, img_rows, img_cols) if K.imgae_data_format() == "channels_first" else (img_rows, img_cols, img_channels)
+img_dim = (img_channels, img_rows, img_cols) if K.image_data_format() == "channels_first" else (img_rows, img_cols, img_channels)
 depth = 40
 nb_dense_block = 3
 growth_rate = 12

From c1af01ff774e55d03db4f3a59b25b1cd43f65789 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Tue, 28 Mar 2017 17:27:49 -0400
Subject: [PATCH 12/21] densenet.py depth can be None

---
 keras_contrib/applications/densenet.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index af9177d35..257ea3e70 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -52,7 +52,8 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
         specified in your Keras config file.
 
         # Arguments
-            depth: number or layers in the DenseNet
+            depth: Number of layers in the DenseNet. May be None if
+                nb_dense_block and nb_layers_per_block are set.
             nb_dense_block: number of dense blocks to add to end (generally = 3)
             growth_rate: number of filters to add per dense block
             nb_filter: initial number of filters. -1 indicates initial
@@ -482,7 +483,8 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
 
     concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
 
-    assert (depth - 4) % 3 == 0, "Depth must be nb_dense_block * N + 4"
+    if depth is not None:
+        assert (depth - 4) % 3 == 0, "Depth must be nb_dense_block * N + 4"
     if reduction != 0.0:
         assert reduction <= 1.0 and reduction > 0.0, "reduction value must lie between 0.0 and 1.0"
 

From fe5210e763b948811f74480e84da74d0eff0d5a3 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Tue, 28 Mar 2017 17:44:06 -0400
Subject: [PATCH 13/21] densenet.py minor cleanup

---
 keras_contrib/applications/densenet.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index 257ea3e70..ca52e6f1c 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -60,7 +60,7 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
                 number of filters is 2 * growth_rate
             nb_layers_per_block: number of layers in each dense block.
                 Can be a -1, positive integer or a list.
-                If -1, calculates nb_layer_per_block from the depth of the network.
+                If -1, calculates nb_layer_per_block from the network depth.
                 If positive integer, a set number of layers per dense block.
                 If list, nb_layer is used as provided. Note that list size must
                 be (nb_dense_block + 1)
@@ -322,7 +322,7 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
         inter_channel = nb_filter * 4
 
         x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False,
-                          kernel_regularizer=l2(weight_decay))(x)
+                   kernel_regularizer=l2(weight_decay))(x)
 
         if dropout_rate:
             x = Dropout(dropout_rate)(x)
@@ -363,7 +363,7 @@ def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None,
                            beta_regularizer=l2(weight_decay))(ip)
     x = Activation('relu')(x)
     x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer="he_uniform", padding="same", use_bias=False,
-                      kernel_regularizer=l2(weight_decay), dilation_rate=dilation_rate)(x)
+               kernel_regularizer=l2(weight_decay), dilation_rate=dilation_rate)(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
 
@@ -431,10 +431,10 @@ def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None,
         x = UpSampling2D()(ip)
     elif type == 'subpixel':
         x = Conv2D(nb_filters, 3, 3, activation="relu", padding='same', kernel_regularizer=l2(weight_decay),
-                          use_bias=False, kernel_initializer='he_uniform')(ip)
+                   use_bias=False, kernel_initializer='he_uniform')(ip)
         x = SubPixelUpscaling(scale_factor=2)(x)
         x = Conv2D(nb_filters, 3, 3, activation="relu", padding='same', kernel_regularizer=l2(weight_decay),
-                          use_bias=False, kernel_initializer='he_uniform')(x)
+                   use_bias=False, kernel_initializer='he_uniform')(x)
     elif type == 'atrous':
         # waiting on https://github.com/fchollet/keras/issues/4018
         x = Conv2D(nb_filters, 3, 3, activation="relu", kernel_regularizer=l2(weight_decay),
@@ -516,7 +516,8 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
     compression = 1.0 - reduction
 
     # Initial convolution
-    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform", padding="same", name="initial_conv2D", use_bias=False,
+    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform",
+               padding="same", name="initial_conv2D", use_bias=False,
                kernel_regularizer=l2(weight_decay))(img_input)
 
     # Add dense blocks
@@ -679,7 +680,8 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
             l, nb_filters=n_filters_keep, type=upsampling_type, output_shape=out_shape)
 
         # concatenate the skip connection with the transition block
-        x = merge([t, skip_list[block_idx]], mode='concat', concat_axis=concat_axis)
+        x = merge([t, skip_list[block_idx]],
+                  mode='concat', concat_axis=concat_axis)
         # x = concatenate([t, skip_list[block_idx]], axis=concat_axis)
 
         if K.image_data_format() == 'channels_first':

From 9976e03c3f431da4fa8441997e844cdc6836fdef Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Tue, 28 Mar 2017 18:42:35 -0400
Subject: [PATCH 14/21] densenet_test.py added

---
 .../applications/densenet_test.py             | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 tests/keras_contrib/applications/densenet_test.py

diff --git a/tests/keras_contrib/applications/densenet_test.py b/tests/keras_contrib/applications/densenet_test.py
new file mode 100644
index 000000000..c22ebca4b
--- /dev/null
+++ b/tests/keras_contrib/applications/densenet_test.py
@@ -0,0 +1,35 @@
+import pytest
+import numpy as np
+from numpy.testing import assert_allclose
+
+from keras import backend as K
+from keras.optimizers import Adam
+from keras_contrib.applications.densenet import DenseNet
+from keras_contrib.applications.densenet import DenseNetFCN
+
+
+def test_densenet():
+    '''Tests if DenseNet Models can be created correctly
+    '''
+    densenet = DenseNet()
+    optimizer = Adam(lr=1e-3)
+    densenet.compile(loss='categorical_crossentropy',
+                     optimizer=optimizer,
+                     metrics=['accuracy'])
+
+    fcn_densenet = DenseNetFCN((32, 32, 3))
+    fcn_densenet.compile(loss='categorical_crossentropy',
+                         optimizer=optimizer,
+                         metrics=['accuracy'])
+
+    atrous_densenet = DenseNet(depth=None, nb_dense_block=4, growth_rate=12,
+                               nb_filter=16, nb_layers_per_block=4,
+                               weights=None, dilation_rate=2)
+
+    atrous_densenet.compile(loss='categorical_crossentropy',
+                            optimizer=optimizer,
+                            metrics=['accuracy'])
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])

From 9665d721cf38e6f01b3cb4cf7bc888f14230d9f4 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Tue, 28 Mar 2017 18:49:52 -0400
Subject: [PATCH 15/21] densenet.py 'atrous' now works as 'dilation_rate'

---
 keras_contrib/applications/densenet.py | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index 42aec5224..a97f03fea 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -223,7 +223,7 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
                 into, only to be specified if `include_top` is True, and
                 if no `weights` argument is specified.
             upsampling_conv: number of convolutional layers in upsampling via subpixel convolution
-            upsampling_type: Can be one of 'upsampling', 'deconv', 'atrous' and
+            upsampling_type: Can be one of 'upsampling', 'deconv', and
                 'subpixel'. Defines type of upsampling algorithm used.
             batchsize: Fixed batch size. This is a temporary requirement for
                 computation of output shape in the case of Deconvolution2D layers.
@@ -241,9 +241,9 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
 
     upsampling_type = upsampling_type.lower()
 
-    if upsampling_type not in ['upsampling', 'deconv', 'atrous', 'subpixel']:
-        raise ValueError('Parameter "upsampling_type" must be one of "upsampling", '
-                         '"deconv", "atrous" or "subpixel".')
+    if upsampling_type not in ['upsampling', 'deconv', 'subpixel']:
+        raise ValueError('Parameter "upsampling_type" must be one of '
+                         '"upsampling", "deconv", or "subpixel".')
 
     if upsampling_type == 'deconv' and batchsize is None:
         raise ValueError('If "upsampling_type" is deconvoloution, then a fixed '
@@ -257,11 +257,6 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
         raise ValueError('Number of dense layers per block must be greater than 1. Argument '
                          'value was %d.' % (nb_layers_per_block))
 
-    if upsampling_type == 'atrous':
-        warnings.warn('Atrous Convolution upsampling does not correctly work (see https://github.com/fchollet/keras/issues/4018).\n'
-                      'Switching to `upsampling` type upscaling.')
-        upsampling_type = 'upsampling'
-
     # Determine proper input shape
     input_shape = _obtain_input_shape(input_shape,
                                       default_size=32,
@@ -420,7 +415,7 @@ def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None,
     Args:
         ip: keras tensor
         nb_filters: number of layers
-        type: can be 'upsampling', 'subpixel', 'deconv', or 'atrous'. Determines type of upsampling performed
+        type: can be 'upsampling', 'subpixel', or 'deconv'. Determines type of upsampling performed
         output_shape: required if type = 'deconv'. Output shape of tensor
         weight_decay: weight decay factor
 
@@ -435,10 +430,6 @@ def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None,
         x = SubPixelUpscaling(scale_factor=2)(x)
         x = Conv2D(nb_filters, (3, 3), activation="relu", padding='same', kernel_regularizer=l2(weight_decay),
                    use_bias=False, kernel_initializer='he_uniform')(x)
-    elif type == 'atrous':
-        # waiting on https://github.com/fchollet/keras/issues/4018
-        x = Conv2D(nb_filters, (3, 3), activation="relu", kernel_regularizer=l2(weight_decay),
-                   use_bias=False, atrous_rate=(2, 2), kernel_initializer='he_uniform')(ip)
     else:
         x = Conv2DTranspose(nb_filters, (3, 3), output_shape, activation='relu', padding='same',
                             subsample=(2, 2), kernel_initializer='he_uniform')(ip)
@@ -579,7 +570,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
             If list, nb_layer is used as provided. Note that list size must
             be (nb_dense_block + 1)
         nb_upsampling_conv: number of convolutional layers in upsampling via subpixel convolution
-        upsampling_type: Can be one of 'upsampling', 'deconv', 'atrous' and
+        upsampling_type: Can be one of 'upsampling', 'deconv', and
             'subpixel'. Defines type of upsampling algorithm used.
         batchsize: Fixed batch size. This is a temporary requirement for
             computation of output shape in the case of Deconvolution2D layers.

From db6cd5de0b93b28fca05fd26e4fe2d2b36b00cad Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Wed, 29 Mar 2017 00:04:40 -0400
Subject: [PATCH 16/21] densenet.py reparameterized to better control top and
 transition layers

---
 keras_contrib/applications/densenet.py | 279 +++++++++++++++++--------
 1 file changed, 196 insertions(+), 83 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index a97f03fea..f8907541e 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -35,10 +35,16 @@
 TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering-no-top.h5'
 
 
-def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers_per_block=-1,
-             bottleneck=False, reduction=0.0, dropout_rate=0.0, weight_decay=1E-4,
-             include_top=True, weights='cifar10', input_tensor=None, input_shape=None,
-             classes=10, dilation_rate=1, pooling="avg"):
+def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12,
+             nb_filter=16,
+             nb_layers_per_block=-1, bottleneck=False, reduction=0.0,
+             dropout_rate=0.0, weight_decay=1E-4, include_top=True,
+             top='classification',
+             weights='cifar10', input_tensor=None,
+             classes=10, transition_dilation_rate=1,
+             transition_pooling="avg",
+             transition_kernel_size=(1, 1),
+             activation='softmax'):
     """Instantiate the DenseNet architecture,
         optionally loading weights pre-trained
         on CIFAR-10. Note that when using TensorFlow,
@@ -51,7 +57,20 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
         convention used by the model is the one
         specified in your Keras config file.
 
+        For segmentation problems specify `transition_dilation_rate >= 2`,
+        `transition_pooling=None`, `weights=None`, `top='segmentation'`.
+        Good options also include `nb_dense_block=4`, `nb_layers_per_block=4`,
+        and `depth=None`, but this varies by application.
+
         # Arguments
+
+            input_shape: optional shape tuple, only to be specified
+                if `include_top` is False (otherwise the input shape
+                has to be `(32, 32, 3)` (with `tf` dim ordering)
+                or `(3, 32, 32)` (with `th` dim ordering).
+                It should have exactly 3 inputs channels,
+                and width and height should be no smaller than 8.
+                E.g. `(200, 200, 3)` would be one valid value.
             depth: Number of layers in the DenseNet. May be None if
                 nb_dense_block and nb_layers_per_block are set.
             nb_dense_block: number of dense blocks to add to end (generally = 3)
@@ -71,25 +90,33 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
             weight_decay: weight decay factor
             include_top: whether to include the fully-connected
                 layer at the top of the network.
+            top: One of 'segmentation', 'classification', or None.
+                'classification' includes global average pooling and
+                a dense activation layer with a single output and multiple
+                classes. 'segmentation' includes a Conv2D and
+                a softmax activation. None is the same as `include_top=False`.
             weights: one of `None` (random initialization) or
                 "cifar10" (pre-training on CIFAR-10)..
-            input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
+            input_tensor: optional Keras tensor
+                (i.e. output of `layers.Input()`)
                 to use as image input for the model.
-            input_shape: optional shape tuple, only to be specified
-                if `include_top` is False (otherwise the input shape
-                has to be `(32, 32, 3)` (with `tf` dim ordering)
-                or `(3, 32, 32)` (with `th` dim ordering).
-                It should have exactly 3 inputs channels,
-                and width and height should be no smaller than 8.
-                E.g. `(200, 200, 3)` would be one valid value.
             classes: optional number of classes to classify images
                 into, only to be specified if `include_top` is True, and
                 if no `weights` argument is specified.
-            dilation_rate: an integer or tuple/list of 2 integers, specifying
-                the dilation rate to use for dilated convolution. Can be a
-                single integer to specify the same value for all spatial
-                dimensions.
-            pooling: Data pooling to reduce resolution, one of "avg", "max", None
+            transition_dilation_rate: An integer or tuple/list of 2 integers,
+                specifying the dilation rate to in transition blocks for
+                dilated convolution, increasing the receptive field of the
+                algorithm. Can be a single integer to specify the same value
+                for all spatial dimensions.
+            transition_pooling: Data pooling to reduce resolution in transition
+                blocks, one of "avg", "max", or None.
+            transition_kernel_size: Adjusts the filter size of the Conv2D in
+                each transition block, useful in segmentation for controlling
+                the receptive field, particularly when combined with
+                transition_dilation_rate.
+            activation: Type of activation at the top layer. Can be one of
+               'softmax' or 'sigmoid'. Note that if sigmoid is used,
+                classes must be 1.
 
         # Returns
             A Keras model instance.
@@ -104,15 +131,19 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
         raise ValueError('If using `weights` as CIFAR 10 with `include_top`'
                          ' as true, `classes` should be 10')
 
+    if activation not in ['softmax', 'sigmoid']:
+        raise ValueError('activation must be one of "softmax" or "sigmoid"')
+    if activation == 'sigmoid' and classes != 1:
+        raise ValueError('sigmoid activation can only be used when classes = 1')
     # Determine proper input shape
+    # If doing segmentation we still include
+    # top but _obtain_input_shape only
+    # supports labeling.
     input_shape = _obtain_input_shape(input_shape,
                                       default_size=32,
                                       min_size=8,
                                       data_format=K.image_data_format(),
-                                      # If doing segmentation we still include
-                                      # top but _obtain_input_shape only
-                                      # supports labeling.
-                                      include_top=(include_top and dilation_rate is 1))
+                                      include_top=(include_top and transition_dilation_rate is 1))
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
@@ -122,10 +153,11 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
         else:
             img_input = input_tensor
 
-    x = __create_dense_net(classes, img_input, include_top, depth, nb_dense_block,
+    x = __create_dense_net(classes, img_input, include_top, top, depth, nb_dense_block,
                            growth_rate, nb_filter, nb_layers_per_block, bottleneck,
-                           reduction, dropout_rate, weight_decay, dilation_rate,
-                           pooling, input_shape)
+                           reduction, dropout_rate, weight_decay, transition_dilation_rate,
+                           transition_pooling, transition_kernel_size, input_shape,
+                           activation)
 
     # Ensure that the model takes into account
     # any potential predecessors of `input_tensor`.
@@ -182,11 +214,17 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
     return model
 
 
-def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_block=4,
-                reduction=0.0, dropout_rate=0.0, weight_decay=1E-4, init_conv_filters=48,
-                include_top=True, weights=None, input_tensor=None, classes=1,
-                upsampling_conv=128, upsampling_type='upsampling', batchsize=None,
-                dilation_rate=1):
+def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
+                nb_layers_per_block=4, reduction=0.0, dropout_rate=0.0,
+                weight_decay=1E-4, init_conv_filters=48,
+                include_top=True, top='segmentation',
+                weights=None, input_tensor=None, classes=1,
+                activation='softmax',
+                upsampling_conv=128, upsampling_type='upsampling',
+                batchsize=None,
+                transition_dilation_rate=1,
+                transition_pooling="avg",
+                transition_kernel_size=(1, 1)):
     """Instantiate the DenseNet FCN architecture.
         Note that when using TensorFlow,
         for best performance you should set
@@ -194,14 +232,15 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
         at ~/.keras/keras.json.
 
         # Arguments
-            nb_dense_block: number of dense blocks to add to end (generally = 3)
+            nb_dense_block: number of dense blocks to add to end (generally = 5)
             growth_rate: number of filters to add per dense block
             nb_layers_per_block: number of layers in each dense block.
                 Can be a positive integer or a list.
                 If positive integer, a set number of layers per dense block.
                 If list, nb_layer is used as provided. Note that list size must
                 be (nb_dense_block + 1)
-            reduction: reduction factor of transition blocks.
+            reduction: reduction factor of transition blocks with
+                0 <= reduction < 1.
                 Note : reduction value is inverted to compute compression.
             dropout_rate: dropout rate
             weight_decay: weight decay factor
@@ -222,6 +261,9 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
             classes: optional number of classes to classify images
                 into, only to be specified if `include_top` is True, and
                 if no `weights` argument is specified.
+            activation: Type of activation at the top layer. Can be one of
+                'softmax' or 'sigmoid'. Note that if sigmoid is used,
+                classes must be 1.
             upsampling_conv: number of convolutional layers in upsampling via subpixel convolution
             upsampling_type: Can be one of 'upsampling', 'deconv', and
                 'subpixel'. Defines type of upsampling algorithm used.
@@ -229,6 +271,17 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
                 computation of output shape in the case of Deconvolution2D layers.
                 Parameter will be removed in next iteration of Keras, which infers
                 output shape of deconvolution layers automatically.
+            transition_dilation_rate: An integer or tuple/list of 2 integers,
+                specifying the dilation rate to in transition blocks for
+                dilated convolution, increasing the receptive field of the
+                algorithm. Can be a single integer to specify the same value
+                for all spatial dimensions.
+            transition_pooling: Data pooling to reduce resolution in transition
+                blocks, one of "avg", "max", or None.
+            transition_kernel_size: Adjusts the filter size of the Conv2D in
+                each transition block, useful in segmentation for controlling
+                the receptive field, particularly when combined with
+                transition_dilation_rate.
 
         # Returns
             A Keras model instance.
@@ -257,14 +310,20 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
         raise ValueError('Number of dense layers per block must be greater than 1. Argument '
                          'value was %d.' % (nb_layers_per_block))
 
+    if activation not in ['softmax', 'sigmoid']:
+        raise ValueError('activation must be one of "softmax" or "sigmoid"')
+
+    if activation == 'sigmoid' and classes != 1:
+        raise ValueError('sigmoid activation can only be used when classes = 1')
+
     # Determine proper input shape
+    # If doing segmentation we still include top
+    # but _obtain_input_shape only supports
+    # labeling, not segmentation networks.
     input_shape = _obtain_input_shape(input_shape,
                                       default_size=32,
                                       min_size=16,
                                       data_format=K.image_data_format(),
-                                      # If doing segmentation we still include top
-                                      # but _obtain_input_shape only supports
-                                      # labeling, not segmentation networks.
                                       include_top=False)
 
     if input_tensor is None:
@@ -278,7 +337,9 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
     x = __create_fcn_dense_net(classes, img_input, include_top, nb_dense_block,
                                growth_rate, reduction, dropout_rate, weight_decay,
                                nb_layers_per_block, upsampling_conv, upsampling_type,
-                               batchsize, init_conv_filters, input_shape)
+                               batchsize, init_conv_filters, input_shape, transition_dilation_rate,
+                               transition_pooling, transition_kernel_size,
+                               activation, input_shape)
 
     # Ensure that the model takes into account
     # any potential predecessors of `input_tensor`.
@@ -316,17 +377,20 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
         # https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua
         inter_channel = nb_filter * 4
 
-        x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False,
+        x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_uniform',
+                   padding='same', use_bias=False,
                    kernel_regularizer=l2(weight_decay))(x)
 
         if dropout_rate:
             x = Dropout(dropout_rate)(x)
 
-        x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+        x = BatchNormalization(mode=0, axis=concat_axis,
+                               gamma_regularizer=l2(weight_decay),
                                beta_regularizer=l2(weight_decay))(x)
         x = Activation('relu')(x)
 
-    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform", padding="same", use_bias=False,
+    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform",
+               padding="same", use_bias=False,
                kernel_regularizer=l2(weight_decay))(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
@@ -335,30 +399,39 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
 
 
 def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None,
-                       weight_decay=1E-4, dilation_rate=1, pooling="avg"):
+                       weight_decay=1E-4, dilation_rate=1, pooling="avg",
+                       kernel_size=(1, 1)):
     ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D
 
     Args:
         ip: keras tensor
         nb_filter: number of filters
-        compression: calculated as 1 - reduction. Reduces the number of feature maps
-                    in the transition block.
+        compression: calculated as 1 - reduction. Reduces the number of
+            feature maps in the transition block.
         dropout_rate: dropout rate
         weight_decay: weight decay factor
-        dilation_rate: an integer or tuple/list of 2 integers, specifying the dilation rate to
-        use for dilated convolution. Can be a single integer to specify the same value for
-        all spatial dimensions.
+        dilation_rate: an integer or tuple/list of 2 integers, specifying the
+          dilation rate to use for dilated, or atrous convolution.
+          Can be a single integer to specify the same value for all
+          spatial dimensions.
+        pooling: Data pooling to reduce resolution,
+            one of "avg", "max", or None.
 
-    Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool
+    Returns:
+
+        keras tensor, after applying batch_norm, relu-conv, dropout, maxpool
     '''
 
     concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
 
-    x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
+    x = BatchNormalization(axis=concat_axis,
+                           gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(ip)
     x = Activation('relu')(x)
-    x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer="he_uniform", padding="same", use_bias=False,
-               kernel_regularizer=l2(weight_decay), dilation_rate=dilation_rate)(x)
+    x = Conv2D(int(nb_filter * compression), kernel_size,
+               kernel_initializer="he_uniform", padding="same", use_bias=False,
+               kernel_regularizer=l2(weight_decay),
+               dilation_rate=dilation_rate)(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
 
@@ -437,37 +510,52 @@ def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None,
     return x
 
 
-def __create_dense_net(nb_classes, img_input, include_top, depth=40,
+def __create_dense_net(nb_classes, img_input, include_top=True,
+                       top='classification', depth=40,
                        nb_dense_block=3, growth_rate=12, nb_filter=-1,
                        nb_layers_per_block=-1, bottleneck=False, reduction=0.0,
-                       dropout_rate=None, weight_decay=1E-4, dilation_rate=1,
-                       pooling="avg", input_shape=None):
+                       dropout_rate=None, weight_decay=1E-4,
+                       transition_dilation_rate=1, transition_pooling="avg",
+                       transition_kernel_size=(1, 1), input_shape=None,
+                       activation='softmax'):
     ''' Build the DenseNet model
 
     Args:
         nb_classes: number of classes
-        img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels)
+        img_input: tuple of shape (channels, rows, columns) or
+            (rows, columns, channels)
         include_top: flag to include the final Dense layer
         depth: number or layers
         nb_dense_block: number of dense blocks to add to end (generally = 3)
         growth_rate: number of filters to add per dense block
-        nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate
+        nb_filter: initial number of filters. Default -1 indicates
+            initial number of filters is 2 * growth_rate.
         nb_layers_per_block: number of layers in each dense block.
-                Can be a -1, positive integer or a list.
-                If -1, calculates nb_layer_per_block from the depth of the network.
-                If positive integer, a set number of layers per dense block.
-                If list, nb_layer is used as provided. Note that list size must
-                be (nb_dense_block + 1)
+            Can be a -1, positive integer or a list.
+            If -1, calculates nb_layer_per_block from the depth of the network.
+            If positive integer, a set number of layers per dense block.
+            If list, nb_layer is used as provided. Note that list size must
+            be (nb_dense_block + 1)
         bottleneck: add bottleneck blocks
-        reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression
+        reduction: reduction factor of transition blocks.
+            Note : reduction value is inverted to compute compression
         dropout_rate: dropout rate
         weight_decay: weight decay
-        dilation_rate: an integer or tuple/list of 2 integers, specifying
-            the dilation rate to use for dilated convolution. Can be a
-            single integer to specify the same value for all spatial
-            dimensions.
-        pooling: Data pooling to reduce resolution, one of "avg", "max", None
+        transition_dilation_rate: An integer or tuple/list of 2 integers,
+            specifying the dilation rate to in transition blocks for
+            dilated convolution, increasing the receptive field of the
+            algorithm. Can be a single integer to specify the same value
+            for all spatial dimensions.
+        transition_pooling: Data pooling to reduce resolution in transition
+            blocks, one of "avg", "max", or None.
+        transition_kernel_size: Adjusts the filter size of the Conv2D in
+            each transition block, useful in segmentation for controlling
+            the receptive field, particularly when combined with
+            transition_dilation_rate.
         input_shape: Only used for shape inference in fully convolutional networks.
+        activation: Type of activation at the top layer. Can be one of
+            'softmax' or 'sigmoid'. Note that if sigmoid is used,
+            classes must be 1.
 
     Returns: keras tensor with nb_layers of conv_block appended
     '''
@@ -476,6 +564,8 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
 
     if depth is not None:
         assert (depth - 4) % 3 == 0, "Depth must be nb_dense_block * N + 4"
+    else:
+        assert nb_layers_per_block is not -1, "Depth cannot be None when nb_layers_per_block is -1. Specify either parameter."
     if reduction != 0.0:
         assert reduction <= 1.0 and reduction > 0.0, "reduction value must lie between 0.0 and 1.0"
 
@@ -513,11 +603,17 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
 
     # Add dense blocks
     for block_idx in range(nb_dense_block - 1):
-        x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck,
-                                     dropout_rate=dropout_rate, weight_decay=weight_decay)
+        x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter,
+                                     growth_rate, bottleneck=bottleneck,
+                                     dropout_rate=dropout_rate,
+                                     weight_decay=weight_decay)
         # add transition_block
-        x = __transition_block(x, nb_filter, compression=compression, dropout_rate=dropout_rate,
-                               weight_decay=weight_decay, dilation_rate=dilation_rate, pooling=pooling)
+        x = __transition_block(x, nb_filter, compression=compression,
+                               dropout_rate=dropout_rate,
+                               weight_decay=weight_decay,
+                               dilation_rate=transition_dilation_rate,
+                               pooling=transition_pooling,
+                               kernel_size=transition_kernel_size)
         nb_filter = int(nb_filter * compression)
 
     # The last dense_block does not have a transition_block
@@ -527,13 +623,12 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
     x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(x)
     x = Activation('relu')(x)
-    if pooling is not None:
-        x = GlobalAveragePooling2D()(x)
 
-    if include_top and pooling is not None:
-        x = Dense(nb_classes, activation='softmax', kernel_regularizer=l2(
+    if include_top and top is 'classification':
+        x = GlobalAveragePooling2D()(x)
+        x = Dense(nb_classes, activation=activation, kernel_regularizer=l2(
             weight_decay), bias_regularizer=l2(weight_decay))(x)
-    elif include_top and pooling is None:
+    elif include_top and top is 'segmentation':
         x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay),
                    use_bias=False)(x)
 
@@ -543,7 +638,7 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
             row, col, channel = input_shape
 
         x = Reshape((row * col, nb_classes))(x)
-        x = Activation('softmax')(x)
+        x = Activation(activation)(x)
         x = Reshape((row, col, nb_classes))(x)
 
     return x
@@ -552,7 +647,12 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40,
 def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12,
                            reduction=0.0, dropout_rate=None, weight_decay=1E-4,
                            nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='upsampling',
-                           batchsize=None, init_conv_filters=48, input_shape=None):
+                           batchsize=None, init_conv_filters=48,
+                           transition_dilation_rate=1,
+                           transition_pooling='avg',
+                           transition_kernel_size=(1, 1),
+                           activation='softmax',
+                           input_shape=None):
     ''' Build the DenseNet model
 
     Args:
@@ -577,6 +677,17 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
             Parameter will be removed in next iteration of Keras, which infers
             output shape of deconvolution layers automatically.
         input_shape: Only used for shape inference in fully convolutional networks.
+        transition_dilation_rate: An integer or tuple/list of 2 integers,
+            specifying the dilation rate to in transition blocks for
+            dilated convolution, increasing the receptive field of the
+            algorithm. Can be a single integer to specify the same value
+            for all spatial dimensions.
+        transition_pooling: Data pooling to reduce resolution in transition
+            blocks, one of "avg", "max", or None.
+        transition_kernel_size: Adjusts the filter size of the Conv2D in
+            each transition block, useful in segmentation for controlling
+            the receptive field, particularly when combined with
+            transition_dilation_rate.
 
     Returns: keras tensor with nb_layers of conv_block appended
     '''
@@ -594,7 +705,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
     # check if upsampling_conv has minimum number of filters
     # minimum is set to 12, as at least 3 color channels are needed for
     # correct upsampling
-    assert nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0, "Parameter `upsampling_conv` number of channels must " \
+    assert nb_upsampling_conv >= 12 and nb_upsampling_conv % 4 == 0, "Parameter `upsampling_conv` number of channels must " \
                                                                     "be a positive number divisible by 4 and greater " \
                                                                     "than 12"
 
@@ -661,14 +772,16 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
         else:
             out_shape[3] = n_filters_keep
 
-        # upsampling block must upsample only the feature maps (concat_list[1:]),
-        # not the concatenation of the input with the feature maps
-        # (concat_list[0].
+        # upsampling block must upsample only the
+        # feature maps (concat_list[1:]),
+        # not the concatenation of the input with the
+        # feature maps (concat_list[0]).
         l = merge(concat_list[1:], mode='concat', concat_axis=concat_axis)
         # l = concatenate(concat_list[1:], axis=concat_axis)
 
-        t = __transition_up_block(
-            l, nb_filters=n_filters_keep, type=upsampling_type, output_shape=out_shape)
+        t = __transition_up_block(l, nb_filters=n_filters_keep,
+                                  type=upsampling_type,
+                                  output_shape=out_shape)
 
         # concatenate the skip connection with the transition block
         x = merge([t, skip_list[block_idx]],
@@ -688,7 +801,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
                                                   weight_decay=weight_decay,
                                                   return_concat_list=True, grow_nb_filters=False)
 
-    if include_top:
+    if include_top and top is 'classification':
         x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay),
                    use_bias=False)(x)
 
@@ -698,7 +811,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
             row, col, channel = input_shape
 
         x = Reshape((row * col, nb_classes))(x)
-        x = Activation('softmax')(x)
+        x = Activation(activation)(x)
         x = Reshape((row, col, nb_classes))(x)
 
     return x

From 9c0927817afb55284971f94c44f8983c0b602e09 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Wed, 29 Mar 2017 15:17:32 -0400
Subject: [PATCH 17/21] densenet.py keras-2 + pep8 cleanup

---
 keras_contrib/applications/densenet.py | 291 +++++++++++++++----------
 1 file changed, 181 insertions(+), 110 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index f8907541e..c97b582e2 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -3,8 +3,10 @@
 
 # Reference
 
-- [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf)
-- [The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic Segmentation](https://arxiv.org/pdf/1611.09326.pdf)
+- [Densely Connected Convolutional Networks]
+  (https://arxiv.org/pdf/1608.06993.pdf)
+- [The One Hundred Layers Tiramisu: Fully Convolutional
+  DenseNets for Semantic Segmentation](https://arxiv.org/pdf/1611.09326.pdf)
 """
 from __future__ import print_function
 from __future__ import absolute_import
@@ -16,8 +18,9 @@
 from keras.layers.core import Dense, Dropout, Activation, Reshape
 from keras.layers.convolutional import Conv2D, Conv2DTranspose, UpSampling2D
 from keras.layers.pooling import AveragePooling2D
+from keras.layers.pooling import MaxPooling2D
 from keras.layers.pooling import GlobalAveragePooling2D
-from keras.layers import Input, merge
+from keras.layers import Input
 from keras.layers.merge import concatenate
 from keras.layers.normalization import BatchNormalization
 from keras.regularizers import l2
@@ -29,27 +32,33 @@
 
 from keras_contrib.layers.convolutional import SubPixelUpscaling
 
-TH_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering.h5'
-TF_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering.h5'
-TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering-no-top.h5'
-TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering-no-top.h5'
+TH_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download' \
+                  '/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering.h5'
+TF_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download' \
+                  '/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering.h5'
+TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases' \
+                          '/download/v2.0/DenseNet-40-12-Theano-Backend-TH' \
+                          '-dim-ordering-no-top.h5'
+TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/' \
+                         'download/v2.0/DenseNet-40-12-Tensorflow-Backend-' \
+                         'TF-dim-ordering-no-top.h5'
 
 
-def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12,
-             nb_filter=16,
+def DenseNet(input_shape=None, depth=40, nb_dense_block=3,
+             growth_rate=12, nb_filter=16,
              nb_layers_per_block=-1, bottleneck=False, reduction=0.0,
              dropout_rate=0.0, weight_decay=1E-4, include_top=True,
              top='classification',
              weights='cifar10', input_tensor=None,
              classes=10, transition_dilation_rate=1,
-             transition_pooling="avg",
+             transition_pooling='avg',
              transition_kernel_size=(1, 1),
              activation='softmax'):
     """Instantiate the DenseNet architecture,
         optionally loading weights pre-trained
         on CIFAR-10. Note that when using TensorFlow,
         for best performance you should set
-        `image_dim_ordering="tf"` in your Keras config
+        `image_dim_ordering='tf'` in your Keras config
         at ~/.keras/keras.json.
 
         The model and the weights are compatible with both
@@ -73,7 +82,8 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12,
                 E.g. `(200, 200, 3)` would be one valid value.
             depth: Number of layers in the DenseNet. May be None if
                 nb_dense_block and nb_layers_per_block are set.
-            nb_dense_block: number of dense blocks to add to end (generally = 3)
+            nb_dense_block: number of dense blocks to add to end
+                (generally = 3)
             growth_rate: number of filters to add per dense block
             nb_filter: initial number of filters. -1 indicates initial
                 number of filters is 2 * growth_rate
@@ -96,7 +106,7 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12,
                 classes. 'segmentation' includes a Conv2D and
                 a softmax activation. None is the same as `include_top=False`.
             weights: one of `None` (random initialization) or
-                "cifar10" (pre-training on CIFAR-10)..
+                'cifar10' (pre-training on CIFAR-10)..
             input_tensor: optional Keras tensor
                 (i.e. output of `layers.Input()`)
                 to use as image input for the model.
@@ -109,7 +119,7 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12,
                 algorithm. Can be a single integer to specify the same value
                 for all spatial dimensions.
             transition_pooling: Data pooling to reduce resolution in transition
-                blocks, one of "avg", "max", or None.
+                blocks, one of 'avg', 'max', or None.
             transition_kernel_size: Adjusts the filter size of the Conv2D in
                 each transition block, useful in segmentation for controlling
                 the receptive field, particularly when combined with
@@ -134,16 +144,18 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12,
     if activation not in ['softmax', 'sigmoid']:
         raise ValueError('activation must be one of "softmax" or "sigmoid"')
     if activation == 'sigmoid' and classes != 1:
-        raise ValueError('sigmoid activation can only be used when classes = 1')
+        raise ValueError('sigmoid activation can' +
+                         'only be used when classes = 1')
     # Determine proper input shape
     # If doing segmentation we still include
     # top but _obtain_input_shape only
     # supports labeling.
+    input_shape_include_top = (include_top and transition_dilation_rate is 1)
     input_shape = _obtain_input_shape(input_shape,
                                       default_size=32,
                                       min_size=8,
                                       data_format=K.image_data_format(),
-                                      include_top=(include_top and transition_dilation_rate is 1))
+                                      include_top=input_shape_include_top)
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
@@ -153,10 +165,13 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12,
         else:
             img_input = input_tensor
 
-    x = __create_dense_net(classes, img_input, include_top, top, depth, nb_dense_block,
-                           growth_rate, nb_filter, nb_layers_per_block, bottleneck,
-                           reduction, dropout_rate, weight_decay, transition_dilation_rate,
-                           transition_pooling, transition_kernel_size, input_shape,
+    x = __create_dense_net(classes, img_input, include_top, top, depth,
+                           nb_dense_block, growth_rate, nb_filter,
+                           nb_layers_per_block, bottleneck,
+                           reduction, dropout_rate, weight_decay,
+                           transition_dilation_rate,
+                           transition_pooling, transition_kernel_size,
+                           input_shape,
                            activation)
 
     # Ensure that the model takes into account
@@ -170,25 +185,30 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12,
 
     # load weights
     if weights == 'cifar10':
-        if (depth == 40) and (nb_dense_block == 3) and (growth_rate == 12) and (nb_filter == 16) and \
-                (bottleneck is False) and (reduction == 0.0) and (dropout_rate == 0.0) and (weight_decay == 1E-4):
+        if (depth == 40) and (nb_dense_block == 3) and \
+           (growth_rate == 12) and (nb_filter == 16) and \
+           (bottleneck is False) and (reduction == 0.0) and \
+           (dropout_rate == 0.0) and (weight_decay == 1E-4):
             # Default parameters match. Weights for this model exist:
 
             if K.image_data_format() == 'channels_first':
                 if include_top:
-                    weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels.h5',
+                    fname = 'densenet_40_12_th_dim_ordering_th_kernels.h5'
+                    weights_path = get_file(fname,
                                             TH_WEIGHTS_PATH,
                                             cache_subdir='models')
                 else:
-                    weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels_no_top.h5',
+                    fname = 'densenet_40_12_th_dim_ordering' \
+                            '_th_kernels_no_top.h5'
+                    weights_path = get_file(fname,
                                             TH_WEIGHTS_PATH_NO_TOP,
                                             cache_subdir='models')
 
                 model.load_weights(weights_path)
 
                 if K.backend() == 'tensorflow':
-                    warnings.warn('You are using the TensorFlow backend, yet you '
-                                  'are using the Theano '
+                    warnings.warn('You are using the TensorFlow backend, '
+                                  'yet you are using the Theano '
                                   'image dimension ordering convention '
                                   '(`image_dim_ordering="th"`). '
                                   'For best performance, set '
@@ -198,11 +218,14 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12,
                     convert_all_kernels_in_model(model)
             else:
                 if include_top:
-                    weights_path = get_file('densenet_40_12_tf_dim_ordering_tf_kernels.h5',
+
+                    weights_path = get_file('densenet_40_12_tf_dim_ordering'
+                                            '_tf_kernels.h5',
                                             TF_WEIGHTS_PATH,
                                             cache_subdir='models')
                 else:
-                    weights_path = get_file('densenet_40_12_tf_dim_ordering_tf_kernels_no_top.h5',
+                    weights_path = get_file('densenet_40_12_tf_dim_ordering'
+                                            '_tf_kernels_no_top.h5',
                                             TF_WEIGHTS_PATH_NO_TOP,
                                             cache_subdir='models')
 
@@ -223,7 +246,7 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
                 upsampling_conv=128, upsampling_type='upsampling',
                 batchsize=None,
                 transition_dilation_rate=1,
-                transition_pooling="avg",
+                transition_pooling='avg',
                 transition_kernel_size=(1, 1)):
     """Instantiate the DenseNet FCN architecture.
         Note that when using TensorFlow,
@@ -232,7 +255,9 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
         at ~/.keras/keras.json.
 
         # Arguments
-            nb_dense_block: number of dense blocks to add to end (generally = 5)
+
+            nb_dense_block: number of dense blocks to add to end
+                (generally = 5)
             growth_rate: number of filters to add per dense block
             nb_layers_per_block: number of layers in each dense block.
                 Can be a positive integer or a list.
@@ -244,12 +269,14 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
                 Note : reduction value is inverted to compute compression.
             dropout_rate: dropout rate
             weight_decay: weight decay factor
-            init_conv_filters: number of layers in the initial convolution layer
+            init_conv_filters: number of layers in the initial
+                convolution layer
             include_top: whether to include the fully-connected
                 layer at the top of the network.
             weights: one of `None` (random initialization) or
                 "cifar10" (pre-training on CIFAR-10)..
-            input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
+            input_tensor: optional Keras tensor
+                (i.e. output of `layers.Input()`)
                 to use as image input for the model.
             input_shape: optional shape tuple, only to be specified
                 if `include_top` is False (otherwise the input shape
@@ -264,12 +291,14 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
             activation: Type of activation at the top layer. Can be one of
                 'softmax' or 'sigmoid'. Note that if sigmoid is used,
                 classes must be 1.
-            upsampling_conv: number of convolutional layers in upsampling via subpixel convolution
+            upsampling_conv: number of convolutional layers in
+                upsampling via subpixel convolution
             upsampling_type: Can be one of 'upsampling', 'deconv', and
                 'subpixel'. Defines type of upsampling algorithm used.
-            batchsize: Fixed batch size. This is a temporary requirement for
-                computation of output shape in the case of Deconvolution2D layers.
-                Parameter will be removed in next iteration of Keras, which infers
+            batchsize: Fixed batch size. This is a temporary
+                requirement for computation of output shape in the
+                case of Deconvolution2D layers. Parameter will be removed
+                in next iteration of Keras, which infers
                 output shape of deconvolution layers automatically.
             transition_dilation_rate: An integer or tuple/list of 2 integers,
                 specifying the dilation rate to in transition blocks for
@@ -299,22 +328,25 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
                          '"upsampling", "deconv", or "subpixel".')
 
     if upsampling_type == 'deconv' and batchsize is None:
-        raise ValueError('If "upsampling_type" is deconvoloution, then a fixed '
-                         'batch size must be provided in batchsize parameter.')
+        raise ValueError('If "upsampling_type" is deconvoloution, then a '
+                         'fixed batch size must be provided in '
+                         'batchsize parameter.')
 
     if input_shape is None:
         raise ValueError(
             'For fully convolutional models, input shape must be supplied.')
 
     if type(nb_layers_per_block) is not list and nb_dense_block < 1:
-        raise ValueError('Number of dense layers per block must be greater than 1. Argument '
-                         'value was %d.' % (nb_layers_per_block))
+        raise ValueError('Number of dense layers per block must be greater '
+                         'than 1. Argument value was %d.' %
+                         (nb_layers_per_block))
 
     if activation not in ['softmax', 'sigmoid']:
         raise ValueError('activation must be one of "softmax" or "sigmoid"')
 
     if activation == 'sigmoid' and classes != 1:
-        raise ValueError('sigmoid activation can only be used when classes = 1')
+        raise ValueError(
+            'sigmoid activation can only be used when classes = 1')
 
     # Determine proper input shape
     # If doing segmentation we still include top
@@ -335,9 +367,11 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
             img_input = input_tensor
 
     x = __create_fcn_dense_net(classes, img_input, include_top, nb_dense_block,
-                               growth_rate, reduction, dropout_rate, weight_decay,
-                               nb_layers_per_block, upsampling_conv, upsampling_type,
-                               batchsize, init_conv_filters, input_shape, transition_dilation_rate,
+                               growth_rate, reduction, dropout_rate,
+                               weight_decay, nb_layers_per_block,
+                               upsampling_conv, upsampling_type,
+                               batchsize, init_conv_filters,
+                               transition_dilation_rate,
                                transition_pooling, transition_kernel_size,
                                activation, input_shape)
 
@@ -353,7 +387,8 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
     return model
 
 
-def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1E-4):
+def __conv_block(ip, nb_filter, bottleneck=False,
+                 dropout_rate=None, weight_decay=1E-4):
     ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout
 
     Args:
@@ -363,12 +398,14 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
         dropout_rate: dropout rate
         weight_decay: weight decay factor
 
-    Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck)
+    Returns: keras tensor with batch_norm, relu and convolution2d added
+             (optional bottleneck)
     '''
 
-    concat_axis = 1 if K.image_data_format() == "channels_first" else -1
+    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
 
-    x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
+    x = BatchNormalization(axis=concat_axis,
+                           gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(ip)
     x = Activation('relu')(x)
 
@@ -389,8 +426,8 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
                                beta_regularizer=l2(weight_decay))(x)
         x = Activation('relu')(x)
 
-    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform",
-               padding="same", use_bias=False,
+    x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_uniform',
+               padding='same', use_bias=False,
                kernel_regularizer=l2(weight_decay))(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
@@ -399,15 +436,15 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
 
 
 def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None,
-                       weight_decay=1E-4, dilation_rate=1, pooling="avg",
+                       weight_decay=1E-4, dilation_rate=1, pooling='avg',
                        kernel_size=(1, 1)):
-    ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D
+    ''' Apply BatchNorm, Relu 1x1, Conv2D, compression, dropout and Maxpooling2D
 
     Args:
         ip: keras tensor
         nb_filter: number of filters
         compression: calculated as 1 - reduction. Reduces the number of
-            feature maps in the transition block.
+            feature maps in the transition block, is optional.
         dropout_rate: dropout rate
         weight_decay: weight decay factor
         dilation_rate: an integer or tuple/list of 2 integers, specifying the
@@ -429,23 +466,24 @@ def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None,
                            beta_regularizer=l2(weight_decay))(ip)
     x = Activation('relu')(x)
     x = Conv2D(int(nb_filter * compression), kernel_size,
-               kernel_initializer="he_uniform", padding="same", use_bias=False,
+               kernel_initializer='he_uniform', padding='same', use_bias=False,
                kernel_regularizer=l2(weight_decay),
                dilation_rate=dilation_rate)(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
 
-    if pooling == "avg":
+    if pooling == 'avg':
         x = AveragePooling2D((2, 2), strides=(2, 2))(x)
-    elif pooling == "max":
+    elif pooling == 'max':
         x = MaxPooling2D((2, 2), strides=(2, 2))(x)
 
     return x
 
 
-def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1E-4,
+def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False,
+                  dropout_rate=None, weight_decay=1E-4,
                   grow_nb_filters=True, return_concat_list=False):
-    ''' Build a dense_block where the output of each conv_block is fed to subsequent ones
+    ''' Build a dense_block where each conv_block is fed to subsequent ones
 
     Args:
         x: keras tensor
@@ -456,7 +494,8 @@ def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropou
         dropout_rate: dropout rate
         weight_decay: weight decay factor
         grow_nb_filters: flag to decide to allow number of filters to grow
-        return_concat_list: return the list of feature maps along with the actual output
+        return_concat_list: return the list of feature maps along with the
+            actual output
 
     Returns: keras tensor with nb_layers of conv_block appended
     '''
@@ -470,8 +509,7 @@ def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropou
                          dropout_rate, weight_decay)
         x_list.append(x)
 
-        x = merge(x_list, mode='concat', concat_axis=concat_axis)
-        # x = concatenate(x_list, concat_axis)
+        x = concatenate(x_list, concat_axis)
 
         if grow_nb_filters:
             nb_filter += growth_rate
@@ -482,13 +520,15 @@ def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropou
         return x, nb_filter
 
 
-def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None, weight_decay=1E-4):
+def __transition_up_block(ip, nb_filters, type='upsampling',
+                          output_shape=None, weight_decay=1E-4):
     ''' SubpixelConvolutional Upscaling (factor = 2)
 
     Args:
         ip: keras tensor
         nb_filters: number of layers
-        type: can be 'upsampling', 'subpixel', or 'deconv'. Determines type of upsampling performed
+        type: can be 'upsampling', 'subpixel', or 'deconv'. Determines type of
+            upsampling performed
         output_shape: required if type = 'deconv'. Output shape of tensor
         weight_decay: weight decay factor
 
@@ -498,14 +538,18 @@ def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None,
     if type == 'upsampling':
         x = UpSampling2D()(ip)
     elif type == 'subpixel':
-        x = Conv2D(nb_filters, (3, 3), activation="relu", padding='same', kernel_regularizer=l2(weight_decay),
+        x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same',
+                   kernel_regularizer=l2(weight_decay),
                    use_bias=False, kernel_initializer='he_uniform')(ip)
         x = SubPixelUpscaling(scale_factor=2)(x)
-        x = Conv2D(nb_filters, (3, 3), activation="relu", padding='same', kernel_regularizer=l2(weight_decay),
+        x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same',
+                   kernel_regularizer=l2(weight_decay),
                    use_bias=False, kernel_initializer='he_uniform')(x)
     else:
-        x = Conv2DTranspose(nb_filters, (3, 3), output_shape, activation='relu', padding='same',
-                            subsample=(2, 2), kernel_initializer='he_uniform')(ip)
+        x = Conv2DTranspose(nb_filters, (3, 3), output_shape,
+                            activation='relu', padding='same',
+                            subsample=(2, 2),
+                            kernel_initializer='he_uniform')(ip)
 
     return x
 
@@ -518,7 +562,7 @@ def __create_dense_net(nb_classes, img_input, include_top=True,
                        transition_dilation_rate=1, transition_pooling="avg",
                        transition_kernel_size=(1, 1), input_shape=None,
                        activation='softmax'):
-    ''' Build the DenseNet model
+    """Build the DenseNet model
 
     Args:
         nb_classes: number of classes
@@ -552,29 +596,34 @@ def __create_dense_net(nb_classes, img_input, include_top=True,
             each transition block, useful in segmentation for controlling
             the receptive field, particularly when combined with
             transition_dilation_rate.
-        input_shape: Only used for shape inference in fully convolutional networks.
+        input_shape: Only used for shape inference in fully
+            convolutional networks.
         activation: Type of activation at the top layer. Can be one of
             'softmax' or 'sigmoid'. Note that if sigmoid is used,
             classes must be 1.
 
     Returns: keras tensor with nb_layers of conv_block appended
-    '''
+    """
 
     concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
 
     if depth is not None:
-        assert (depth - 4) % 3 == 0, "Depth must be nb_dense_block * N + 4"
+        assert (depth - 4) % 3 == 0, 'Depth must be nb_dense_block * N + 4'
     else:
-        assert nb_layers_per_block is not -1, "Depth cannot be None when nb_layers_per_block is -1. Specify either parameter."
+        assert nb_layers_per_block is not - \
+            1, 'Depth cannot be None when nb_layers_per_block is -1.' \
+            ' Specify either parameter.'
     if reduction != 0.0:
-        assert reduction <= 1.0 and reduction > 0.0, "reduction value must lie between 0.0 and 1.0"
+        assert reduction <= 1.0 and reduction > 0.0, \
+            "reduction value must lie between 0.0 and 1.0"
 
     # layers in each dense block
     if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple:
         nb_layers = list(nb_layers_per_block)  # Convert tuple to list
 
-        assert len(nb_layers) == (nb_dense_block + 1), "If list, nb_layer is used as provided. " \
-                                                       "Note that list size must be (nb_dense_block + 1)"
+        assert len(nb_layers) == (nb_dense_block + 1), \
+            "If list, nb_layer is used as provided. " \
+            "Note that list size must be (nb_dense_block + 1)"
         final_nb_layer = nb_layers[-1]
         nb_layers = nb_layers[:-1]
     else:
@@ -597,8 +646,8 @@ def __create_dense_net(nb_classes, img_input, include_top=True,
     compression = 1.0 - reduction
 
     # Initial convolution
-    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform",
-               padding="same", name="initial_conv2D", use_bias=False,
+    x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_uniform',
+               padding='same', name='initial_conv2D', use_bias=False,
                kernel_regularizer=l2(weight_decay))(img_input)
 
     # Add dense blocks
@@ -617,10 +666,13 @@ def __create_dense_net(nb_classes, img_input, include_top=True,
         nb_filter = int(nb_filter * compression)
 
     # The last dense_block does not have a transition_block
-    x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck,
-                                 dropout_rate=dropout_rate, weight_decay=weight_decay)
+    x, nb_filter = __dense_block(x, final_nb_layer, nb_filter,
+                                 growth_rate, bottleneck=bottleneck,
+                                 dropout_rate=dropout_rate,
+                                 weight_decay=weight_decay)
 
-    x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
+    x = BatchNormalization(axis=concat_axis,
+                           gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(x)
     x = Activation('relu')(x)
 
@@ -629,7 +681,8 @@ def __create_dense_net(nb_classes, img_input, include_top=True,
         x = Dense(nb_classes, activation=activation, kernel_regularizer=l2(
             weight_decay), bias_regularizer=l2(weight_decay))(x)
     elif include_top and top is 'segmentation':
-        x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay),
+        x = Conv2D(nb_classes, (1, 1), activation='linear',
+                   padding='same', kernel_regularizer=l2(weight_decay),
                    use_bias=False)(x)
 
         if K.image_data_format() == 'channels_first':
@@ -644,9 +697,12 @@ def __create_dense_net(nb_classes, img_input, include_top=True,
     return x
 
 
-def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12,
+def __create_fcn_dense_net(nb_classes, img_input, include_top,
+                           top='segmentation',
+                           nb_dense_block=5, growth_rate=12,
                            reduction=0.0, dropout_rate=None, weight_decay=1E-4,
-                           nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='upsampling',
+                           nb_layers_per_block=4, nb_upsampling_conv=128,
+                           upsampling_type='upsampling',
                            batchsize=None, init_conv_filters=48,
                            transition_dilation_rate=1,
                            transition_pooling='avg',
@@ -657,11 +713,13 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
 
     Args:
         nb_classes: number of classes
-        img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels)
+        img_input: tuple of shape (channels, rows, columns) or
+            (rows, columns, channels)
         include_top: flag to include the final Dense layer
         nb_dense_block: number of dense blocks to add to end (generally = 3)
         growth_rate: number of filters to add per dense block
-        reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression
+        reduction: reduction factor of transition blocks.
+            Note: reduction value is inverted to compute compression
         dropout_rate: dropout rate
         weight_decay: weight decay
         nb_layers_per_block: number of layers in each dense block.
@@ -669,14 +727,16 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
             If positive integer, a set number of layers per dense block.
             If list, nb_layer is used as provided. Note that list size must
             be (nb_dense_block + 1)
-        nb_upsampling_conv: number of convolutional layers in upsampling via subpixel convolution
+        nb_upsampling_conv: number of convolutional layers in
+            upsampling via subpixel convolution
         upsampling_type: Can be one of 'upsampling', 'deconv', and
             'subpixel'. Defines type of upsampling algorithm used.
         batchsize: Fixed batch size. This is a temporary requirement for
             computation of output shape in the case of Deconvolution2D layers.
             Parameter will be removed in next iteration of Keras, which infers
             output shape of deconvolution layers automatically.
-        input_shape: Only used for shape inference in fully convolutional networks.
+        input_shape: Only used for shape inference in fully
+            convolutional networks.
         transition_dilation_rate: An integer or tuple/list of 2 integers,
             specifying the dilation rate to in transition blocks for
             dilated convolution, increasing the receptive field of the
@@ -692,7 +752,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
     Returns: keras tensor with nb_layers of conv_block appended
     '''
 
-    concat_axis = 1 if K.image_data_format() == "channels_first" else -1
+    concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
 
     if concat_axis == 1:  # th dim ordering
         _, rows, cols = input_shape
@@ -700,21 +760,24 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
         rows, cols, _ = input_shape
 
     if reduction != 0.0:
-        assert reduction <= 1.0 and reduction > 0.0, "reduction value must lie between 0.0 and 1.0"
+        assert reduction <= 1.0 and reduction > 0.0, \
+            'reduction value must lie between 0.0 and 1.0'
 
     # check if upsampling_conv has minimum number of filters
     # minimum is set to 12, as at least 3 color channels are needed for
     # correct upsampling
-    assert nb_upsampling_conv >= 12 and nb_upsampling_conv % 4 == 0, "Parameter `upsampling_conv` number of channels must " \
-                                                                    "be a positive number divisible by 4 and greater " \
-                                                                    "than 12"
+    assert nb_upsampling_conv >= 12 and nb_upsampling_conv % 4 == 0, \
+        'Parameter `upsampling_conv` number of channels must ' \
+        'be a positive number divisible by 4 and greater ' \
+        'than 12'
 
     # layers in each dense block
     if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple:
         nb_layers = list(nb_layers_per_block)  # Convert tuple to list
 
-        assert len(nb_layers) == (nb_dense_block + 1), "If list, nb_layer is used as provided. " \
-                                                       "Note that list size must be (nb_dense_block + 1)"
+        assert len(nb_layers) == (nb_dense_block + 1), \
+            'If list, nb_layer is used as provided. ' \
+            'Note that list size must be (nb_dense_block + 1)'
 
         bottleneck_nb_layers = nb_layers[-1]
         rev_layers = nb_layers[::-1]
@@ -727,7 +790,8 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
     compression = 1.0 - reduction
 
     # Initial convolution
-    x = Conv2D(init_conv_filters, (3, 3), kernel_initializer="he_uniform", padding="same", name="initial_conv2D", use_bias=False,
+    x = Conv2D(init_conv_filters, (3, 3), kernel_initializer="he_uniform",
+               padding="same", name="initial_conv2D", use_bias=False,
                kernel_regularizer=l2(weight_decay))(img_input)
 
     nb_filter = init_conv_filters
@@ -736,14 +800,17 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
 
     # Add dense blocks and transition down block
     for block_idx in range(nb_dense_block):
-        x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate,
-                                     dropout_rate=dropout_rate, weight_decay=weight_decay)
+        x, nb_filter = __dense_block(x, nb_layers[block_idx],
+                                     nb_filter, growth_rate,
+                                     dropout_rate=dropout_rate,
+                                     weight_decay=weight_decay)
 
         # Skip connection
         skip_list.append(x)
 
         # add transition_block
-        x = __transition_block(x, nb_filter, compression=compression, dropout_rate=dropout_rate,
+        x = __transition_block(x, nb_filter, compression=compression,
+                               dropout_rate=dropout_rate,
                                weight_decay=weight_decay)
 
         # this is calculated inside transition_down_block
@@ -752,8 +819,11 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
     # The last dense_block does not have a transition_down_block
     # return the concatenated feature maps without the concatenation of the
     # input
-    _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate,
-                                              dropout_rate=dropout_rate, weight_decay=weight_decay,
+    _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers,
+                                              nb_filter,
+                                              growth_rate,
+                                              dropout_rate=dropout_rate,
+                                              weight_decay=weight_decay,
                                               return_concat_list=True)
 
     skip_list = skip_list[::-1]  # reverse the skip list
@@ -776,17 +846,14 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
         # feature maps (concat_list[1:]),
         # not the concatenation of the input with the
         # feature maps (concat_list[0]).
-        l = merge(concat_list[1:], mode='concat', concat_axis=concat_axis)
-        # l = concatenate(concat_list[1:], axis=concat_axis)
+        l = concatenate(concat_list[1:], axis=concat_axis)
 
         t = __transition_up_block(l, nb_filters=n_filters_keep,
                                   type=upsampling_type,
                                   output_shape=out_shape)
 
         # concatenate the skip connection with the transition block
-        x = merge([t, skip_list[block_idx]],
-                  mode='concat', concat_axis=concat_axis)
-        # x = concatenate([t, skip_list[block_idx]], axis=concat_axis)
+        x = concatenate([t, skip_list[block_idx]], axis=concat_axis)
 
         if K.image_data_format() == 'channels_first':
             out_shape[2] *= 2
@@ -796,13 +863,17 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
             out_shape[2] *= 2
 
         # Dont allow the feature map size to grow in upsampling dense blocks
-        _, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1], nb_filter=growth_rate,
-                                                  growth_rate=growth_rate, dropout_rate=dropout_rate,
-                                                  weight_decay=weight_decay,
-                                                  return_concat_list=True, grow_nb_filters=False)
+        _, nb_filter, concat_list = \
+            __dense_block(x,
+                          nb_layers[nb_dense_block + block_idx + 1],
+                          nb_filter=growth_rate, growth_rate=growth_rate,
+                          dropout_rate=dropout_rate,
+                          weight_decay=weight_decay,
+                          return_concat_list=True, grow_nb_filters=False)
 
     if include_top and top is 'classification':
-        x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay),
+        x = Conv2D(nb_classes, (1, 1), activation='linear',
+                   padding='same', kernel_regularizer=l2(weight_decay),
                    use_bias=False)(x)
 
         if K.image_data_format() == 'channels_first':

From 47659afc076ebf4ffe37a9be355338dc8bd00089 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 30 Mar 2017 16:11:47 -0400
Subject: [PATCH 18/21] pep8 fix

---
 keras_contrib/applications/densenet.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index c97b582e2..afacd6545 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -32,16 +32,17 @@
 
 from keras_contrib.layers.convolutional import SubPixelUpscaling
 
-TH_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download' \
-                  '/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering.h5'
-TF_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download' \
-                  '/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering.h5'
-TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases' \
-                          '/download/v2.0/DenseNet-40-12-Theano-Backend-TH' \
-                          '-dim-ordering-no-top.h5'
-TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/' \
-                         'download/v2.0/DenseNet-40-12-Tensorflow-Backend-' \
-                         'TF-dim-ordering-no-top.h5'
+TH_WEIGHTS_PATH = ('https://github.com/titu1994/DenseNet/releases/download'
+                   '/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering.h5')
+TF_WEIGHTS_PATH = ('https://github.com/titu1994/DenseNet/releases/download'
+                   '/v2.0/DenseNet-40-12-Tensorflow'
+                   '-Backend-TF-dim-ordering.h5')
+TH_WEIGHTS_PATH_NO_TOP = ('https://github.com/titu1994/DenseNet/releases'
+                          '/download/v2.0/DenseNet-40-12-Theano-Backend-TH'
+                          '-dim-ordering-no-top.h5')
+TF_WEIGHTS_PATH_NO_TOP = ('https://github.com/titu1994/DenseNet/releases/'
+                          'download/v2.0/DenseNet-40-12-Tensorflow-Backend-'
+                          'TF-dim-ordering-no-top.h5')
 
 
 def DenseNet(input_shape=None, depth=40, nb_dense_block=3,

From 64630aed0386f33db592ad3ccc9265967fc87d65 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Thu, 30 Mar 2017 21:07:55 -0400
Subject: [PATCH 19/21] densenet.py add missing param in DenseNetFCN

---
 keras_contrib/applications/densenet.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index afacd6545..fa0a32827 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -367,7 +367,8 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
         else:
             img_input = input_tensor
 
-    x = __create_fcn_dense_net(classes, img_input, include_top, nb_dense_block,
+    x = __create_fcn_dense_net(classes, img_input, include_top, top,
+                               nb_dense_block,
                                growth_rate, reduction, dropout_rate,
                                weight_decay, nb_layers_per_block,
                                upsampling_conv, upsampling_type,

From f9cf75a58d5f2936c572d554075e20174a7fb9f7 Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Fri, 31 Mar 2017 20:52:16 -0400
Subject: [PATCH 20/21] densenet.py BatchNormalization Keras-2 update

---
 keras_contrib/applications/densenet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index fa0a32827..733f1d0ce 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -423,7 +423,7 @@ def __conv_block(ip, nb_filter, bottleneck=False,
         if dropout_rate:
             x = Dropout(dropout_rate)(x)
 
-        x = BatchNormalization(mode=0, axis=concat_axis,
+        x = BatchNormalization(axis=concat_axis,
                                gamma_regularizer=l2(weight_decay),
                                beta_regularizer=l2(weight_decay))(x)
         x = Activation('relu')(x)

From 2a2c1768cc4b6f314533a8e5d2583cf7a5385e3a Mon Sep 17 00:00:00 2001
From: Andrew Hundt <ATHundt@gmail.com>
Date: Mon, 3 Apr 2017 23:23:16 -0400
Subject: [PATCH 21/21] DenseNetFCN corrected include_top case

---
 keras_contrib/applications/densenet.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index 733f1d0ce..50431491d 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -314,6 +314,7 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16,
                 transition_dilation_rate.
 
         # Returns
+
             A Keras model instance.
     """
 
@@ -873,7 +874,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top,
                           weight_decay=weight_decay,
                           return_concat_list=True, grow_nb_filters=False)
 
-    if include_top and top is 'classification':
+    if include_top and top is 'segmentation':
         x = Conv2D(nb_classes, (1, 1), activation='linear',
                    padding='same', kernel_regularizer=l2(weight_decay),
                    use_bias=False)(x)