kubeflow · k8s-ci-robot · Feb 27, 2019 · Feb 19, 2019 · Feb 19, 2019 · Feb 20, 2019
diff --git a/examples/NAS-training-containers/cifar10/ModelConstructor.py b/examples/NAS-training-containers/cifar10/ModelConstructor.py
@@ -4,7 +4,7 @@
 import json
 from keras.layers import Input, Conv2D, ZeroPadding2D, concatenate, MaxPooling2D, \
     AveragePooling2D, Dense, Activation, BatchNormalization, GlobalAveragePooling2D, Dropout
-from op_library import concat, conv, reduction
+from op_library import concat, conv, sp_conv, dw_conv, reduction
 
 
 class ModelConstructor(object):
@@ -45,6 +45,10 @@ def build_model(self):
             layer_input = concat(input_layers)
             if opt_config['opt_type'] == 'convolution':
                 layer_output = conv(layer_input, opt_config)
+            if opt_config['opt_type'] == 'separable_convolution':
+                layer_output = sp_conv(layer_input, opt_config)
+            if opt_config['opt_type'] == 'depthwise_convolution':
+                layer_output = dw_conv(layer_input, opt_config)
             elif opt_config['opt_type'] == 'reduction':
                 layer_output = reduction(layer_input, opt_config)
 

diff --git a/examples/NAS-training-containers/cifar10/RunTrial.py b/examples/NAS-training-containers/cifar10/RunTrial.py
@@ -3,6 +3,8 @@
 from keras.datasets import cifar10
 from ModelConstructor import ModelConstructor
 from keras.utils import to_categorical
+from keras.utils import multi_gpu_model
+from keras.preprocessing.image import ImageDataGenerator
 import argparse
 import time
 
@@ -14,6 +16,8 @@
                         help='configurations and search space embeddings')
     parser.add_argument('--num_epochs', type=int, default=10, metavar='N',
                         help='number of epoches that each child will be trained')
+    parser.add_argument('--num_gpus', type=int, default=1, metavar='N',
+                        help='number of epoches that each child will be trained')
     args = parser.parse_args()
 
     arch = args.architecture.replace("\'", "\"")
@@ -28,11 +32,18 @@
     print(">>> num_epochs received by trial")
     print(num_epochs)
 
-    print(">>> Constructing Model...")
+    num_gpus = args.num_gpus
+    print(">>> num_gpus received by trial:")
+    print(num_gpus)
+
+    print("\n>>> Constructing Model...")
     constructor = ModelConstructor(arch, nn_config)
     test_model = constructor.build_model()
-    print(">>> Model Constructed Successfully")
+    print(">>> Model Constructed Successfully\n")
 
+    if num_gpus > 1:
+        test_model = multi_gpu_model(test_model, gpus=num_gpus)
+
     test_model.summary()
     test_model.compile(loss=keras.losses.categorical_crossentropy,
                        optimizer=keras.optimizers.Adam(lr=1e-3, decay=1e-4),
@@ -46,13 +57,20 @@
     y_train = to_categorical(y_train)
     y_test = to_categorical(y_test)
 
-    print(">>> Data Loaded. Training start.")
+    augmentation = ImageDataGenerator(
+        width_shift_range=0.1,
+        height_shift_range=0.1,
+        horizontal_flip=True)
+
+    aug_data_flow = augmentation.flow(x_train, y_train, batch_size=128)
+
+    print(">>> Data Loaded. Training starts.")
     for e in range(num_epochs):
         print("\nTotal Epoch {}/{}".format(e+1, num_epochs))
-        history = test_model.fit(x=x_train, y=y_train,
-                                 shuffle=True, batch_size=128,
-                                 epochs=1, verbose=1,
-                                 validation_data=(x_test, y_test))
+        history = test_model.fit_generator(generator=aug_data_flow,
+                                           steps_per_epoch=int(len(x_train)/128)+1,
+                                           epochs=1, verbose=1,
+                                           validation_data=(x_test, y_test))
         print("Training-Accuracy={}".format(history.history['acc'][-1]))
         print("Training-Loss={}".format(history.history['loss'][-1]))
         print("Validation-Accuracy={}".format(history.history['val_acc'][-1]))

diff --git a/examples/NAS-training-containers/cifar10/op_library.py b/examples/NAS-training-containers/cifar10/op_library.py
@@ -1,7 +1,8 @@
 import numpy as np
 from keras import backend as K
 from keras.layers import Input, Conv2D, ZeroPadding2D, concatenate, MaxPooling2D, \
-    AveragePooling2D, Dense, Activation, BatchNormalization, GlobalAveragePooling2D
+    AveragePooling2D, Dense, Activation, BatchNormalization, GlobalAveragePooling2D, \
+    SeparableConv2D, DepthwiseConv2D
 
 
 def concat(inputs):
@@ -56,12 +57,69 @@ def conv(x, config):
     return result
 
 
+def sp_conv(x, config):
+    parameters = {
+        "num_filter":       64,
+        "filter_size":      3,
+        "stride":           1,
+        "depth_multiplier": 1,
+    }
+
+    for k in parameters.keys():
+        if k in config:
+            parameters[k] = int(config[k])
+
+    activated = Activation('relu')(x)
+
+    conved = SeparableConv2D(
+        filters=parameters['num_filter'],
+        kernel_size=parameters['filter_size'],
+        strides=parameters['stride'],
+        depth_multiplier=parameters['depth_multiplier'],
+        padding='same')(activated)
+
+    result = BatchNormalization()(conved)
+
+    return result
+
+def dw_conv(x, config):
+    parameters = {
+        "filter_size":      3,
+        "stride":           1,
+        "depth_multiplier": 1,
+    }
+    for k in parameters.keys():
+        if k in config:
+            parameters[k] = int(config[k])
+
+    activated = Activation('relu')(x)
+
+    conved = DepthwiseConv2D(
+        kernel_size=parameters['filter_size'],
+        strides=parameters['stride'],
+        depth_multiplier=parameters['depth_multiplier'],
+        padding='same')(activated)
+
+    result = BatchNormalization()(conved)
+
+    return result
+
+
 def reduction(x, config):
+    # handle the exteme case where the input has the dimension 1 by 1 and is not reductible
+    # we will just change the reduction layer to identity layer
+    # such situation is very likely to appear though
+    dim = K.int_shape(x)
+    if dim[1] == 1 or dim[2] == 1:
+        print("WARNING: One or more dimensions of the input of the reduction layer is 1. It cannot be further reduced. A identity layer will be used instead.")
+        return x
+
     parameters = {
         'reduction_type':   "max_pooling",
         'pool_size':        2,
         'stride':           None,
     }
+
     if 'reduction_type' in config:
         parameters['reduction_type'] = config['reduction_type']
     if 'pool_size' in config:

diff --git a/examples/nasjob-example-RL.yaml b/examples/nasjob-example-RL.yaml
@@ -1,3 +1,10 @@
+# This example aims to show all the possible operations
+# is not very likely to get good result due to the extensive search space
+
+# In practice, setting up a limited search space with more common operations is more likely to get better performance. 
+# For example, Efficient Neural Architecture Search via Parameter Sharing (https://arxiv.org/abs/1802.03268)
+# uses only 6 operations, 3x3/5x5 convolution, 3x3/5x5 separable_convolution and 3x3 max_pooling/avg_pooling 
+
 apiVersion: "kubeflow.org/v1alpha1"
 kind: StudyJob
 metadata:
@@ -48,6 +55,57 @@ spec:
               list:
               - "1"
               - "2"
+      - operationType: separable_convolution
+        parameterconfigs:
+          - name: filter_size
+            parametertype: categorical
+            feasible:
+              list:
+              - "3"
+              - "5"
+              - "7"
+          - name: num_filter
+            parametertype: categorical
+            feasible:
+              list:
+              - "32"
+              - "48"
+              - "64"
+              - "96"
+              - "128"
+          - name: stride
+            parametertype: categorical
+            feasible:
+              list:
+              - "1"
+              - "2"
+          - name: depth_multiplier
+            parametertype: categorical
+            feasible:
+              list:
+              - "1"
+              - "2"
+      - operationType: depthwise_convolution
+        parameterconfigs:
+          - name: filter_size
+            parametertype: categorical
+            feasible:
+              list:
+              - "3"
+              - "5"
+              - "7"
+          - name: stride
+            parametertype: categorical
+            feasible:
+              list:
+              - "1"
+              - "2"
+          - name: depth_multiplier
+            parametertype: categorical
+            feasible:
+              list:
+              - "1"
+              - "2"   
       - operationType: reduction
         parameterconfigs:
           - name: reduction_type