From 3fcc5e9ec95ec0ad001deb3584558d040ddb1df3 Mon Sep 17 00:00:00 2001
From: chicm-ms <38930155+chicm-ms@users.noreply.github.com>
Date: Thu, 17 Jan 2019 11:36:19 +0800
Subject: [PATCH] Integration test refactoring (#21) (#616)

* Integration test refactoring (#21)

* Refactoring integration tests

* test metrics

* update azure pipeline

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* update trigger
---
 azure-pipelines.yml                           |  39 ++++--
 examples/trials/cifar10_pytorch/main.py       |   8 +-
 examples/trials/mnist-annotation/mnist.py     |  39 +++---
 examples/trials/mnist/mnist.py                |  47 ++++---
 test/config_test.py                           | 127 ++++++++++++++++++
 .../examples/cifar10-pytorch.test.yml         |  25 ++++
 .../examples/cifar10_search_space.json        |   5 +
 .../examples/mnist-annotation.test.yml        |  24 ++++
 .../config_test/examples/mnist-keras.test.yml |  25 ++++
 test/config_test/examples/mnist.test.yml      |  25 ++++
 .../examples/mnist_search_space.json          |   7 +
 .../examples/sklearn-classification.test.yml  |  25 ++++
 .../examples/sklearn-regression.test.yml      |  25 ++++
 test/config_test/multi_phase/multi_phase.py   |   8 ++
 .../multi_phase/multi_phase.test.yml          |  22 +++
 .../config_test/multi_phase/search_space.json |   7 +
 .../multi_thread/multi_thread.test.yml        |  22 +++
 .../multi_thread/multi_thread_trial.py        |   7 +
 .../multi_thread/multi_thread_tuner.py        |  22 +++
 .../multi_thread/search_space.json            |   7 +
 test/metrics_test.py                          |  98 ++++++++++++++
 test/metrics_test/expected_metrics.json       |   4 +
 test/metrics_test/metrics.test.yml            |  22 +++
 test/metrics_test/search_space.json           |   7 +
 test/metrics_test/trial.py                    |  10 ++
 test/naive_test.py                            |   4 +-
 test/nnictl                                   |   2 -
 test/nnimanager                               |   2 -
 test/training_service.yml                     |  24 ++++
 test/{sdk_test.py => tuner_test.py}           |   8 +-
 test/{sdk_test => tuner_test}/local.yml       |   0
 test/{sdk_test => tuner_test}/naive_trial.py  |   0
 .../search_space.json                         |   0
 test/utils.py                                 |  30 ++++-
 34 files changed, 658 insertions(+), 69 deletions(-)
 create mode 100644 test/config_test.py
 create mode 100644 test/config_test/examples/cifar10-pytorch.test.yml
 create mode 100644 test/config_test/examples/cifar10_search_space.json
 create mode 100644 test/config_test/examples/mnist-annotation.test.yml
 create mode 100644 test/config_test/examples/mnist-keras.test.yml
 create mode 100644 test/config_test/examples/mnist.test.yml
 create mode 100644 test/config_test/examples/mnist_search_space.json
 create mode 100644 test/config_test/examples/sklearn-classification.test.yml
 create mode 100644 test/config_test/examples/sklearn-regression.test.yml
 create mode 100644 test/config_test/multi_phase/multi_phase.py
 create mode 100644 test/config_test/multi_phase/multi_phase.test.yml
 create mode 100644 test/config_test/multi_phase/search_space.json
 create mode 100644 test/config_test/multi_thread/multi_thread.test.yml
 create mode 100644 test/config_test/multi_thread/multi_thread_trial.py
 create mode 100644 test/config_test/multi_thread/multi_thread_tuner.py
 create mode 100644 test/config_test/multi_thread/search_space.json
 create mode 100644 test/metrics_test.py
 create mode 100644 test/metrics_test/expected_metrics.json
 create mode 100644 test/metrics_test/metrics.test.yml
 create mode 100644 test/metrics_test/search_space.json
 create mode 100644 test/metrics_test/trial.py
 delete mode 100644 test/nnictl
 delete mode 100644 test/nnimanager
 create mode 100644 test/training_service.yml
 rename test/{sdk_test.py => tuner_test.py} (93%)
 rename test/{sdk_test => tuner_test}/local.yml (100%)
 rename test/{sdk_test => tuner_test}/naive_trial.py (100%)
 rename test/{sdk_test => tuner_test}/search_space.json (100%)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 95546c8ef9..90df728c8a 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -1,16 +1,12 @@
 trigger:
 - master
 - dev-remote-ci
+- dev-it
 
 jobs:
 
 - job: 'Ubuntu_16_04'
-  pool:
-    vmImage: 'Ubuntu 16.04'
-  strategy:
-    matrix:
-      Python36:
-        PYTHON_VERSION: '3.6'
+  pool: 'NNI CI GPU'
 
   steps:
   - script: python3 -m pip install --upgrade pip setuptools
@@ -18,6 +14,16 @@ jobs:
   - script: |
       source install.sh
     displayName: 'Install nni toolkit via source code'
+  - script: |
+      python3 -m pip install sklearn --user
+      python3 -m pip install torchvision --user
+      python3 -m pip install keras --user
+      python3 -m pip install tensorflow==1.9.0 --user
+    displayName: 'Install dependencies for integration tests'
+  - script: |
+      cd test
+      PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts local --config config_test/examples/mnist.test.yml
+    displayName: 'Examples on local machine tests'
   - script: |
       cd test
       source unittest.sh
@@ -25,11 +31,11 @@ jobs:
   - script: |
       cd test
       PATH=$HOME/.local/bin:$PATH python3 naive_test.py
-    displayName: 'Integration tests'
+    displayName: 'Naive test'
   - script: |
       cd test
-      PATH=$HOME/.local/bin:$PATH python3 sdk_test.py
-    displayName: 'Built-in dispatcher tests'
+      PATH=$HOME/.local/bin:$PATH python3 tuner_test.py
+    displayName: 'Built-in tuners / assessors tests'
 
 - job: 'macOS_10_13'
   pool:
@@ -45,6 +51,11 @@ jobs:
   - script: |
       source install.sh
     displayName: 'Install nni toolkit via source code'
+  - script: |
+      python3 -m pip install sklearn --user
+      python3 -m pip install torchvision --user
+      python3 -m pip install keras --user
+    displayName: 'Install dependencies for integration tests'
   - script: |
       cd test
       PATH=$HOME/Library/Python/3.7/bin:$PATH && source unittest.sh
@@ -52,8 +63,12 @@ jobs:
   - script: |
       cd test
       PATH=$HOME/Library/Python/3.7/bin:$PATH python3 naive_test.py
-    displayName: 'Integration tests'
+    displayName: 'Naive test'
+  - script: |
+      cd test
+      PATH=$HOME/Library/Python/3.7/bin:$PATH python3 tuner_test.py
+    displayName: 'Built-in tuners / assessors tests'
   - script: |
       cd test
-      PATH=$HOME/Library/Python/3.7/bin:$PATH python3 sdk_test.py
-    displayName: 'Built-in dispatcher tests'
\ No newline at end of file
+      PATH=$HOME/Library/Python/3.7/bin:$PATH python3 config_test.py --ts local
+    displayName: 'Examples on local machine tests'
diff --git a/examples/trials/cifar10_pytorch/main.py b/examples/trials/cifar10_pytorch/main.py
index 42e836fb8e..ebde38d37b 100644
--- a/examples/trials/cifar10_pytorch/main.py
+++ b/examples/trials/cifar10_pytorch/main.py
@@ -1,6 +1,6 @@
 '''Train CIFAR10 with PyTorch.'''
 from __future__ import print_function
-
+import argparse
 import torch
 import torch.nn as nn
 import torch.optim as optim
@@ -174,6 +174,10 @@ def test(epoch):
 
 
 if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--epochs", type=int, default=200)
+    args, _ = parser.parse_known_args()
+
     try:
         RCV_CONFIG = nni.get_next_parameter()
         #RCV_CONFIG = {'lr': 0.1, 'optimizer': 'Adam', 'model':'senet18'}
@@ -182,7 +186,7 @@ def test(epoch):
         prepare(RCV_CONFIG)
         acc = 0.0
         best_acc = 0.0
-        for epoch in range(start_epoch, start_epoch+200):
+        for epoch in range(start_epoch, start_epoch+args.epochs):
             train(epoch)
             acc, best_acc = test(epoch)
             nni.report_intermediate_result(acc)
diff --git a/examples/trials/mnist-annotation/mnist.py b/examples/trials/mnist-annotation/mnist.py
index 0c0b213cb6..70c993e521 100644
--- a/examples/trials/mnist-annotation/mnist.py
+++ b/examples/trials/mnist-annotation/mnist.py
@@ -1,5 +1,6 @@
 """A deep MNIST classifier using convolutional layers."""
 
+import argparse
 import logging
 import math
 import tempfile
@@ -180,7 +181,7 @@ def main(params):
     test_acc = 0.0
     with tf.Session() as sess:
         sess.run(tf.global_variables_initializer())
-        """@nni.variable(nni.choice(1, 4, 8, 16, 32), name=batch_size)"""
+        """@nni.variable(nni.choice(16, 32), name=batch_size)"""
         batch_size = params['batch_size']
         for i in range(params['batch_num']):
             batch = mnist.train.next_batch(batch_size)
@@ -210,29 +211,27 @@ def main(params):
         logger.debug('Final result is %g', test_acc)
         logger.debug('Send final result done.')
 
-
-def generate_default_params():
-    '''
-    Generate default parameters for mnist network.
-    '''
-    params = {
-        'data_dir': '/tmp/tensorflow/mnist/input_data',
-        'dropout_rate': 0.5,
-        'channel_1_num': 32,
-        'channel_2_num': 64,
-        'conv_size': 5,
-        'pool_size': 2,
-        'hidden_size': 1024,
-        'learning_rate': 1e-4,
-        'batch_num': 2000,
-        'batch_size': 32}
-    return params
-
+def get_params():
+    ''' Get parameters from command line '''
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
+    parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
+    parser.add_argument("--channel_1_num", type=int, default=32)
+    parser.add_argument("--channel_2_num", type=int, default=64)
+    parser.add_argument("--conv_size", type=int, default=5)
+    parser.add_argument("--pool_size", type=int, default=2)
+    parser.add_argument("--hidden_size", type=int, default=1024)
+    parser.add_argument("--learning_rate", type=float, default=1e-4)
+    parser.add_argument("--batch_num", type=int, default=2000)
+    parser.add_argument("--batch_size", type=int, default=32)
+
+    args, _ = parser.parse_known_args()
+    return args
 
 if __name__ == '__main__':
     '''@nni.get_next_parameter()'''
     try:
-        main(generate_default_params())
+        main(vars(get_params()))
     except Exception as exception:
         logger.exception(exception)
         raise
diff --git a/examples/trials/mnist/mnist.py b/examples/trials/mnist/mnist.py
index e6736480b1..17daa41e9b 100644
--- a/examples/trials/mnist/mnist.py
+++ b/examples/trials/mnist/mnist.py
@@ -1,5 +1,6 @@
 """A deep MNIST classifier using convolutional layers."""
 
+import argparse
 import logging
 import math
 import tempfile
@@ -148,7 +149,8 @@ def main(params):
     Main function, build mnist network, run and send result to NNI.
     '''
     # Import data
-    mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
+    data_dir = params['data_dir'] + str(nni.get_sequence_id())
+    mnist = input_data.read_data_sets(data_dir, one_hot=True)
     print('Mnist download data down.')
     logger.debug('Mnist download data down.')
 
@@ -198,33 +200,30 @@ def main(params):
         logger.debug('Final result is %g', test_acc)
         logger.debug('Send final result done.')
 
-
-def generate_default_params():
-    '''
-    Generate default parameters for mnist network.
-    '''
-    params = {
-        'data_dir': '/tmp/tensorflow/mnist/input_data',
-        'dropout_rate': 0.5,
-        'channel_1_num': 32,
-        'channel_2_num': 64,
-        'conv_size': 5,
-        'pool_size': 2,
-        'hidden_size': 1024,
-        'learning_rate': 1e-4,
-        'batch_num': 2000,
-        'batch_size': 32}
-    return params
-
+def get_params():
+    ''' Get parameters from command line '''
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
+    parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
+    parser.add_argument("--channel_1_num", type=int, default=32)
+    parser.add_argument("--channel_2_num", type=int, default=64)
+    parser.add_argument("--conv_size", type=int, default=5)
+    parser.add_argument("--pool_size", type=int, default=2)
+    parser.add_argument("--hidden_size", type=int, default=1024)
+    parser.add_argument("--learning_rate", type=float, default=1e-4)
+    parser.add_argument("--batch_num", type=int, default=2000)
+    parser.add_argument("--batch_size", type=int, default=32)
+
+    args, _ = parser.parse_known_args()
+    return args
 
 if __name__ == '__main__':
     try:
         # get parameters form tuner
-        RCV_PARAMS = nni.get_next_parameter()
-        logger.debug(RCV_PARAMS)
-        # run
-        params = generate_default_params()
-        params.update(RCV_PARAMS)
+        tuner_params = nni.get_next_parameter()
+        logger.debug(tuner_params)
+        params = vars(get_params())
+        params.update(tuner_params)
         main(params)
     except Exception as exception:
         logger.exception(exception)
diff --git a/test/config_test.py b/test/config_test.py
new file mode 100644
index 0000000000..1100f1c595
--- /dev/null
+++ b/test/config_test.py
@@ -0,0 +1,127 @@
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import os
+import argparse
+import glob
+import subprocess
+import sys
+import time
+import traceback
+
+from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, \
+    parse_max_duration_time, get_succeeded_trial_num, print_stderr
+
+GREEN = '\33[32m'
+RED = '\33[31m'
+CLEAR = '\33[0m'
+
+STATUS_URL = 'http://localhost:8080/api/v1/nni/check-status'
+TRIAL_JOBS_URL = 'http://localhost:8080/api/v1/nni/trial-jobs'
+
+def gen_new_config(config_file, training_service='local'):
+    ''' 
+    Generates temporary config file for integration test, the file
+    should be deleted after testing.
+    '''
+    config = get_yml_content(config_file)
+    new_config_file = config_file + '.tmp'
+
+    ts = get_yml_content('training_service.yml')[training_service]
+    print(config)
+    print(ts)
+    config.update(ts)
+    print(config)
+    dump_yml_content(new_config_file, config)
+
+    return new_config_file, config
+
+def run_test(config_file, training_service, local_gpu=False):
+    '''run test per configuration file'''
+
+    new_config_file, config = gen_new_config(config_file, training_service)
+
+    if training_service == 'local' and not local_gpu and config['trial']['gpuNum'] > 0:
+        print('no gpu, skiping: ', config_file)
+        return
+
+    try:
+        print('Testing %s...' % config_file)
+        proc = subprocess.run(['nnictl', 'create', '--config', new_config_file])
+        assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
+
+        max_duration, max_trial_num = get_max_values(config_file)
+        sleep_interval = 3
+
+        for _ in range(0, max_duration+30, sleep_interval):
+            time.sleep(sleep_interval)
+            status = get_experiment_status(STATUS_URL)
+            if status == 'DONE':
+                num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL)
+                if training_service == 'local':
+                    print_stderr(TRIAL_JOBS_URL)
+                assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num)
+                break
+
+        assert status == 'DONE', 'Failed to finish in maxExecDuration'
+    finally:
+        if os.path.exists(new_config_file):
+            os.remove(new_config_file)
+
+def get_max_values(config_file):
+    '''Get maxExecDuration and maxTrialNum of experiment'''
+    experiment_config = get_yml_content(config_file)
+    return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum']
+
+def run(args):
+    '''test all configuration files'''
+    if args.config is None:
+        config_files = glob.glob('./config_test/**/*.test.yml')
+    else:
+        config_files = args.config.split(',')
+    print(config_files)
+
+    for config_file in config_files:
+        try:
+            # sleep 5 seconds here, to make sure previous stopped exp has enough time to exit to avoid port conflict
+            time.sleep(5)
+            run_test(config_file, args.ts, args.local_gpu)
+            print(GREEN + 'Test %s: TEST PASS' % (config_file) + CLEAR)
+        except Exception as error:
+            print(RED + 'Test %s: TEST FAIL' % (config_file) + CLEAR)
+            print('%r' % error)
+            traceback.print_exc()
+            raise error
+        finally:
+            subprocess.run(['nnictl', 'stop'])
+
+if __name__ == '__main__':
+    import tensorflow as tf
+    print('TF VERSION:', tf.__version__)
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str, default=None)
+    parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai'], default='local')
+    parser.add_argument("--local_gpu", action='store_true')
+    parser.add_argument("--preinstall", action='store_true')
+    args = parser.parse_args()
+
+    setup_experiment(args.preinstall)
+
+    run(args)
diff --git a/test/config_test/examples/cifar10-pytorch.test.yml b/test/config_test/examples/cifar10-pytorch.test.yml
new file mode 100644
index 0000000000..94916e35aa
--- /dev/null
+++ b/test/config_test/examples/cifar10-pytorch.test.yml
@@ -0,0 +1,25 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 15m
+maxTrialNum: 2
+trialConcurrency: 2
+searchSpacePath: ./cifar10_search_space.json
+
+tuner:
+  builtinTunerName: Random
+  classArgs:
+    optimize_mode: maximize
+assessor:
+  builtinAssessorName: Medianstop
+  classArgs:
+    optimize_mode: maximize
+trial:
+  codeDir: ../../../examples/trials/cifar10_pytorch
+  command: python3 main.py --epochs 2
+  gpuNum: 1
+
+useAnnotation: false
+multiPhase: false
+multiThread: false
+
+trainingServicePlatform: local
diff --git a/test/config_test/examples/cifar10_search_space.json b/test/config_test/examples/cifar10_search_space.json
new file mode 100644
index 0000000000..ca1c0d2034
--- /dev/null
+++ b/test/config_test/examples/cifar10_search_space.json
@@ -0,0 +1,5 @@
+{
+    "lr":{"_type":"choice", "_value":[0.1, 0.01, 0.001, 0.0001]},
+    "optimizer":{"_type":"choice", "_value":["SGD", "Adadelta", "Adagrad", "Adam", "Adamax"]},
+    "model":{"_type":"choice", "_value":["vgg", "resnet18"]}
+}
diff --git a/test/config_test/examples/mnist-annotation.test.yml b/test/config_test/examples/mnist-annotation.test.yml
new file mode 100644
index 0000000000..4a3e5ffcc8
--- /dev/null
+++ b/test/config_test/examples/mnist-annotation.test.yml
@@ -0,0 +1,24 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 5m
+maxTrialNum: 2
+trialConcurrency: 2
+
+tuner:
+  builtinTunerName: Random
+  classArgs:
+    optimize_mode: maximize
+assessor:
+  builtinAssessorName: Medianstop
+  classArgs:
+    optimize_mode: maximize
+trial:
+  codeDir: ../../../examples/trials/mnist-annotation
+  command: python3 mnist.py --batch_num 100
+  gpuNum: 0
+
+useAnnotation: true
+multiPhase: false
+multiThread: false
+
+trainingServicePlatform: local
diff --git a/test/config_test/examples/mnist-keras.test.yml b/test/config_test/examples/mnist-keras.test.yml
new file mode 100644
index 0000000000..05b7138e0a
--- /dev/null
+++ b/test/config_test/examples/mnist-keras.test.yml
@@ -0,0 +1,25 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 5m
+maxTrialNum: 2
+trialConcurrency: 2
+searchSpacePath: ../../../examples/trials/mnist-keras/search_space.json
+
+tuner:
+  builtinTunerName: Random
+  classArgs:
+    optimize_mode: maximize
+assessor:
+  builtinAssessorName: Medianstop
+  classArgs:
+    optimize_mode: maximize
+trial:
+  codeDir: ../../../examples/trials/mnist-keras
+  command: python3 mnist-keras.py --num_train 200 --epochs 1
+  gpuNum: 0
+
+useAnnotation: false
+multiPhase: false
+multiThread: false
+
+trainingServicePlatform: local
diff --git a/test/config_test/examples/mnist.test.yml b/test/config_test/examples/mnist.test.yml
new file mode 100644
index 0000000000..68640531b9
--- /dev/null
+++ b/test/config_test/examples/mnist.test.yml
@@ -0,0 +1,25 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 5m
+maxTrialNum: 2
+trialConcurrency: 2
+searchSpacePath: ./mnist_search_space.json
+
+tuner:
+  builtinTunerName: Random
+  classArgs:
+    optimize_mode: maximize
+assessor:
+  builtinAssessorName: Medianstop
+  classArgs:
+    optimize_mode: maximize
+trial:
+  codeDir: ../../../examples/trials/mnist
+  command: python3 mnist.py --batch_num 100
+  gpuNum: 0
+
+useAnnotation: false
+multiPhase: false
+multiThread: false
+
+trainingServicePlatform: local
diff --git a/test/config_test/examples/mnist_search_space.json b/test/config_test/examples/mnist_search_space.json
new file mode 100644
index 0000000000..dd05405e27
--- /dev/null
+++ b/test/config_test/examples/mnist_search_space.json
@@ -0,0 +1,7 @@
+{
+    "dropout_rate":{"_type":"uniform","_value":[0.5, 0.9]},
+    "conv_size":{"_type":"choice","_value":[2,3,5,7]},
+    "hidden_size":{"_type":"choice","_value":[124, 512, 1024]},
+    "batch_size": {"_type":"choice", "_value": [16, 32]},
+    "learning_rate":{"_type":"choice","_value":[0.0001, 0.001, 0.01, 0.1]}
+}
diff --git a/test/config_test/examples/sklearn-classification.test.yml b/test/config_test/examples/sklearn-classification.test.yml
new file mode 100644
index 0000000000..e38f5f54e2
--- /dev/null
+++ b/test/config_test/examples/sklearn-classification.test.yml
@@ -0,0 +1,25 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 5m
+maxTrialNum: 2
+trialConcurrency: 2
+searchSpacePath: ../../../examples/trials/sklearn/classification/search_space.json
+
+tuner:
+  builtinTunerName: Random
+  classArgs:
+    optimize_mode: maximize
+assessor:
+  builtinAssessorName: Medianstop
+  classArgs:
+    optimize_mode: maximize
+trial:
+  codeDir: ../../../examples/trials/sklearn/classification
+  command: python3 main.py
+  gpuNum: 0
+
+useAnnotation: false
+multiPhase: false
+multiThread: false
+
+trainingServicePlatform: local
diff --git a/test/config_test/examples/sklearn-regression.test.yml b/test/config_test/examples/sklearn-regression.test.yml
new file mode 100644
index 0000000000..3448e98c91
--- /dev/null
+++ b/test/config_test/examples/sklearn-regression.test.yml
@@ -0,0 +1,25 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 5m
+maxTrialNum: 2
+trialConcurrency: 2
+searchSpacePath: ../../../examples/trials/sklearn/regression/search_space.json
+
+tuner:
+  builtinTunerName: Random
+  classArgs:
+    optimize_mode: maximize
+assessor:
+  builtinAssessorName: Medianstop
+  classArgs:
+    optimize_mode: maximize
+trial:
+  codeDir: ../../../examples/trials/sklearn/regression
+  command: python3 main.py
+  gpuNum: 0
+
+useAnnotation: false
+multiPhase: false
+multiThread: false
+
+trainingServicePlatform: local
diff --git a/test/config_test/multi_phase/multi_phase.py b/test/config_test/multi_phase/multi_phase.py
new file mode 100644
index 0000000000..39e77d8083
--- /dev/null
+++ b/test/config_test/multi_phase/multi_phase.py
@@ -0,0 +1,8 @@
+import time
+import nni
+
+if __name__ == '__main__':
+    for i in range(5):
+        hyper_params = nni.get_next_parameter()
+        nni.report_final_result(0.1*i)
+        time.sleep(3)
diff --git a/test/config_test/multi_phase/multi_phase.test.yml b/test/config_test/multi_phase/multi_phase.test.yml
new file mode 100644
index 0000000000..b25c2a660b
--- /dev/null
+++ b/test/config_test/multi_phase/multi_phase.test.yml
@@ -0,0 +1,22 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 5m
+maxTrialNum: 2
+trialConcurrency: 2
+searchSpacePath: ./search_space.json
+
+tuner:
+  codeDir: ../../../src/sdk/pynni/tests
+  classFileName: test_multi_phase_tuner.py
+  className: NaiveMultiPhaseTuner
+
+trial:
+  codeDir: .
+  command: python3 multi_phase.py
+  gpuNum: 0
+
+useAnnotation: false
+multiPhase: true
+multiThread: false
+
+trainingServicePlatform: local
diff --git a/test/config_test/multi_phase/search_space.json b/test/config_test/multi_phase/search_space.json
new file mode 100644
index 0000000000..db2573aa52
--- /dev/null
+++ b/test/config_test/multi_phase/search_space.json
@@ -0,0 +1,7 @@
+{
+    "test":
+    {
+        "_type" : "choice",
+        "_value" : [1, 100]
+    } 
+}
\ No newline at end of file
diff --git a/test/config_test/multi_thread/multi_thread.test.yml b/test/config_test/multi_thread/multi_thread.test.yml
new file mode 100644
index 0000000000..6cb077db0e
--- /dev/null
+++ b/test/config_test/multi_thread/multi_thread.test.yml
@@ -0,0 +1,22 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 5m
+maxTrialNum: 2
+trialConcurrency: 2
+searchSpacePath: ./search_space.json
+
+tuner:
+  codeDir: .
+  classFileName: multi_thread_tuner.py
+  className: MultiThreadTuner
+
+trial:
+  codeDir: .
+  command: python3 multi_thread_trial.py
+  gpuNum: 0
+
+useAnnotation: false
+multiPhase: false
+multiThread: true
+
+trainingServicePlatform: local
diff --git a/test/config_test/multi_thread/multi_thread_trial.py b/test/config_test/multi_thread/multi_thread_trial.py
new file mode 100644
index 0000000000..1dcb8ac07d
--- /dev/null
+++ b/test/config_test/multi_thread/multi_thread_trial.py
@@ -0,0 +1,7 @@
+import nni
+import time
+
+if __name__ == '__main__':
+    nni.get_next_parameter()
+    time.sleep(3)
+    nni.report_final_result(0.5)
diff --git a/test/config_test/multi_thread/multi_thread_tuner.py b/test/config_test/multi_thread/multi_thread_tuner.py
new file mode 100644
index 0000000000..77fb3983be
--- /dev/null
+++ b/test/config_test/multi_thread/multi_thread_tuner.py
@@ -0,0 +1,22 @@
+import time
+from nni.tuner import Tuner
+
+
+class MultiThreadTuner(Tuner):
+    def __init__(self):
+        self.parent_done = False
+
+    def generate_parameters(self, parameter_id):
+        if parameter_id == 0:
+            return {'x': 0}
+        else:
+            while not self.parent_done:
+                time.sleep(2)
+            return {'x': 1}
+
+    def receive_trial_result(self, parameter_id, parameters, value):
+        if parameter_id == 0:
+            self.parent_done = True
+
+    def update_search_space(self, search_space):
+        pass
diff --git a/test/config_test/multi_thread/search_space.json b/test/config_test/multi_thread/search_space.json
new file mode 100644
index 0000000000..db2573aa52
--- /dev/null
+++ b/test/config_test/multi_thread/search_space.json
@@ -0,0 +1,7 @@
+{
+    "test":
+    {
+        "_type" : "choice",
+        "_value" : [1, 100]
+    } 
+}
\ No newline at end of file
diff --git a/test/metrics_test.py b/test/metrics_test.py
new file mode 100644
index 0000000000..8dfebfa7d5
--- /dev/null
+++ b/test/metrics_test.py
@@ -0,0 +1,98 @@
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge,
+# to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import subprocess
+import time
+import traceback
+import requests
+import json
+
+from utils import get_experiment_status, get_yml_content, parse_max_duration_time, get_succeeded_trial_num
+
+GREEN = '\33[32m'
+RED = '\33[31m'
+CLEAR = '\33[0m'
+
+STATUS_URL = 'http://localhost:8080/api/v1/nni/check-status'
+TRIAL_JOBS_URL = 'http://localhost:8080/api/v1/nni/trial-jobs'
+METRICS_URL = 'http://localhost:8080/api/v1/nni/metric-data'
+
+def run_test():
+    '''run metrics test'''
+    config_file = 'metrics_test/metrics.test.yml'
+
+    print('Testing %s...' % config_file)
+    proc = subprocess.run(['nnictl', 'create', '--config', config_file])
+    assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
+
+    max_duration, max_trial_num = get_max_values(config_file)
+    sleep_interval = 3
+
+    for _ in range(0, max_duration, sleep_interval):
+        time.sleep(sleep_interval)
+        status = get_experiment_status(STATUS_URL)
+        #print('experiment status:', status)
+        if status == 'DONE':
+            num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL)
+            assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num)
+            check_metrics()
+            break
+
+    assert status == 'DONE', 'Failed to finish in maxExecDuration'
+
+def check_metrics():
+    with open('metrics_test/expected_metrics.json', 'r') as f:
+        expected_metrics = json.load(f)
+    print(expected_metrics)
+    metrics = requests.get(METRICS_URL).json()
+    intermediate_result, final_result = get_metric_results(metrics)
+    assert len(final_result) == 1, 'there should be 1 final result'
+    assert final_result[0] == expected_metrics['final_result']
+    assert set(intermediate_result) == set(expected_metrics['intermediate_result'])
+
+def get_metric_results(metrics):
+    intermediate_result = []
+    final_result = []
+    for metric in metrics:
+        if metric['type'] == 'PERIODICAL':
+            intermediate_result.append(metric['data'])
+        elif metric['type'] == 'FINAL':
+            final_result.append(metric['data'])
+    print(intermediate_result, final_result)
+    
+    return [round(float(x),6) for x in intermediate_result], [round(float(x), 6) for x in final_result]
+
+def get_max_values(config_file):
+    experiment_config = get_yml_content(config_file)
+    return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum']
+
+if __name__ == '__main__':
+    try:
+        # sleep 5 seconds here, to make sure previous stopped exp has enough time to exit to avoid port conflict
+        time.sleep(5)
+        run_test()
+        print(GREEN + 'TEST PASS' + CLEAR)
+    except Exception as error:
+        print(RED + 'TEST FAIL' + CLEAR)
+        print('%r' % error)
+        traceback.print_exc()
+        raise error
+    finally:
+        subprocess.run(['nnictl', 'stop'])
diff --git a/test/metrics_test/expected_metrics.json b/test/metrics_test/expected_metrics.json
new file mode 100644
index 0000000000..dfa01d7480
--- /dev/null
+++ b/test/metrics_test/expected_metrics.json
@@ -0,0 +1,4 @@
+{
+    "intermediate_result": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+    "final_result": 1.0
+}
\ No newline at end of file
diff --git a/test/metrics_test/metrics.test.yml b/test/metrics_test/metrics.test.yml
new file mode 100644
index 0000000000..4b865c9f9e
--- /dev/null
+++ b/test/metrics_test/metrics.test.yml
@@ -0,0 +1,22 @@
+authorName: nni
+experimentName: default_test
+maxExecDuration: 3m
+maxTrialNum: 1
+trialConcurrency: 1
+searchSpacePath: ./search_space.json
+
+tuner:
+  builtinTunerName: Random
+  classArgs:
+    optimize_mode: maximize
+
+trial:
+  codeDir: .
+  command: python3 trial.py
+  gpuNum: 0
+
+useAnnotation: false
+multiPhase: false
+multiThread: false
+
+trainingServicePlatform: local
diff --git a/test/metrics_test/search_space.json b/test/metrics_test/search_space.json
new file mode 100644
index 0000000000..db2573aa52
--- /dev/null
+++ b/test/metrics_test/search_space.json
@@ -0,0 +1,7 @@
+{
+    "test":
+    {
+        "_type" : "choice",
+        "_value" : [1, 100]
+    } 
+}
\ No newline at end of file
diff --git a/test/metrics_test/trial.py b/test/metrics_test/trial.py
new file mode 100644
index 0000000000..9731f5c560
--- /dev/null
+++ b/test/metrics_test/trial.py
@@ -0,0 +1,10 @@
+import time
+import nni
+
+if __name__ == '__main__':
+    hyper_params = nni.get_next_parameter()
+
+    for i in range(10):    
+        nni.report_intermediate_result(0.1*(i+1))
+        time.sleep(2)
+    nni.report_final_result(1.0)
diff --git a/test/naive_test.py b/test/naive_test.py
index f238c7be9e..062b1d98f0 100644
--- a/test/naive_test.py
+++ b/test/naive_test.py
@@ -24,7 +24,7 @@
 import time
 import traceback
 
-from utils import check_experiment_status, fetch_nni_log_path, read_last_line, remove_files, setup_experiment
+from utils import is_experiment_done, fetch_nni_log_path, read_last_line, remove_files, setup_experiment
 
 GREEN = '\33[32m'
 RED = '\33[31m'
@@ -51,7 +51,7 @@ def run():
 
         tuner_status = read_last_line('naive_test/tuner_result.txt')
         assessor_status = read_last_line('naive_test/assessor_result.txt')
-        experiment_status = check_experiment_status(nnimanager_log_path)
+        experiment_status = is_experiment_done(nnimanager_log_path)
 
         assert tuner_status != 'ERROR', 'Tuner exited with error'
         assert assessor_status != 'ERROR', 'Assessor exited with error'
diff --git a/test/nnictl b/test/nnictl
deleted file mode 100644
index b8479d6b6e..0000000000
--- a/test/nnictl
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-python3 -m nni_cmd.nnictl $@
diff --git a/test/nnimanager b/test/nnimanager
deleted file mode 100644
index d2e81a8a65..0000000000
--- a/test/nnimanager
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-cd ../../src/nni_manager && node dist/main.js $@
diff --git a/test/training_service.yml b/test/training_service.yml
new file mode 100644
index 0000000000..3965c38a91
--- /dev/null
+++ b/test/training_service.yml
@@ -0,0 +1,24 @@
+local:
+  trainingServicePlatform: local
+
+remote:
+  trainingServicePlatform: remote
+  machineList:
+    - ip:
+      port:
+      username:
+      passwd:
+
+pai:
+  trainingServicePlatform: pai
+  paiConfig:
+    userName: 
+    passWord: 
+    host: 
+  trial:
+    gpuNum:
+    cpuNum:
+    memoryMB: 
+    image: msranni/latest
+    dataDir: 
+    outputDir: 
diff --git a/test/sdk_test.py b/test/tuner_test.py
similarity index 93%
rename from test/sdk_test.py
rename to test/tuner_test.py
index f83f1c9ed9..4a6d4527d0 100644
--- a/test/sdk_test.py
+++ b/test/tuner_test.py
@@ -23,7 +23,7 @@
 import time
 import traceback
 
-from utils import get_yml_content, dump_yml_content, setup_experiment, fetch_nni_log_path, check_experiment_status
+from utils import get_yml_content, dump_yml_content, setup_experiment, fetch_nni_log_path, is_experiment_done
 
 GREEN = '\33[32m'
 RED = '\33[31m'
@@ -36,7 +36,7 @@
 
 def switch(dispatch_type, dispatch_name):
     '''Change dispatch in config.yml'''
-    config_path = 'sdk_test/local.yml'
+    config_path = 'tuner_test/local.yml'
     experiment_config = get_yml_content(config_path)
     if dispatch_name in ['GridSearch', 'BatchTuner']:
         experiment_config[dispatch_type.lower()] = {
@@ -56,7 +56,7 @@ def test_builtin_dispatcher(dispatch_type, dispatch_name):
     switch(dispatch_type, dispatch_name)
 
     print('Testing %s...' % dispatch_name)
-    proc = subprocess.run(['nnictl', 'create', '--config', 'sdk_test/local.yml'])
+    proc = subprocess.run(['nnictl', 'create', '--config', 'tuner_test/local.yml'])
     assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode
 
     nnimanager_log_path = fetch_nni_log_path(EXPERIMENT_URL)
@@ -64,7 +64,7 @@ def test_builtin_dispatcher(dispatch_type, dispatch_name):
     for _ in range(20):
         time.sleep(3)
         # check if experiment is done
-        experiment_status = check_experiment_status(nnimanager_log_path)
+        experiment_status = is_experiment_done(nnimanager_log_path)
         if experiment_status:
             break
 
diff --git a/test/sdk_test/local.yml b/test/tuner_test/local.yml
similarity index 100%
rename from test/sdk_test/local.yml
rename to test/tuner_test/local.yml
diff --git a/test/sdk_test/naive_trial.py b/test/tuner_test/naive_trial.py
similarity index 100%
rename from test/sdk_test/naive_trial.py
rename to test/tuner_test/naive_trial.py
diff --git a/test/sdk_test/search_space.json b/test/tuner_test/search_space.json
similarity index 100%
rename from test/sdk_test/search_space.json
rename to test/tuner_test/search_space.json
diff --git a/test/utils.py b/test/utils.py
index e430c68cf3..d7d28d451f 100644
--- a/test/utils.py
+++ b/test/utils.py
@@ -73,10 +73,38 @@ def fetch_nni_log_path(experiment_url):
 
     return nnimanager_log_path
 
-def check_experiment_status(nnimanager_log_path):
+def is_experiment_done(nnimanager_log_path):
     '''check if the experiment is done successfully'''
     assert os.path.exists(nnimanager_log_path), 'Experiment starts failed'
     cmds = ['cat', nnimanager_log_path, '|', 'grep', EXPERIMENT_DONE_SIGNAL]
     completed_process = subprocess.run(' '.join(cmds), shell=True)
 
     return completed_process.returncode == 0
+
+def get_experiment_status(status_url):
+    nni_status = requests.get(status_url).json()
+    #print(nni_status)
+    return nni_status['status']
+
+def get_succeeded_trial_num(trial_jobs_url):
+    trial_jobs = requests.get(trial_jobs_url).json()
+    print(trial_jobs)
+    num_succeed = 0
+    for trial_job in trial_jobs:
+        if trial_job['status'] in ['SUCCEEDED', 'EARLY_STOPPED']:
+            num_succeed += 1
+    print('num_succeed:', num_succeed)
+    return num_succeed
+
+def print_stderr(trial_jobs_url):
+    trial_jobs = requests.get(trial_jobs_url).json()
+    for trial_job in trial_jobs:
+        if trial_job['status'] == 'FAILED':
+            stderr_path = trial_job['stderrPath'].split(':')[-1]
+            subprocess.run(['cat', stderr_path])
+
+def parse_max_duration_time(max_exec_duration):
+    unit = max_exec_duration[-1]
+    time = max_exec_duration[:-1]
+    units_dict = {'s':1, 'm':60, 'h':3600, 'd':86400}
+    return int(time) * units_dict[unit]