Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
Integration test refactoring (#21) (#616)
Browse files Browse the repository at this point in the history
* Integration test refactoring (#21)

* Refactoring integration tests

* test metrics

* update azure pipeline

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* updates

* update trigger
  • Loading branch information
chicm-ms authored Jan 17, 2019
1 parent b8e4918 commit 3fcc5e9
Show file tree
Hide file tree
Showing 34 changed files with 658 additions and 69 deletions.
39 changes: 27 additions & 12 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -1,35 +1,41 @@
trigger:
- master
- dev-remote-ci
- dev-it

jobs:

- job: 'Ubuntu_16_04'
pool:
vmImage: 'Ubuntu 16.04'
strategy:
matrix:
Python36:
PYTHON_VERSION: '3.6'
pool: 'NNI CI GPU'

steps:
- script: python3 -m pip install --upgrade pip setuptools
displayName: 'Install python tools'
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
- script: |
python3 -m pip install sklearn --user
python3 -m pip install torchvision --user
python3 -m pip install keras --user
python3 -m pip install tensorflow==1.9.0 --user
displayName: 'Install dependencies for integration tests'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts local --config config_test/examples/mnist.test.yml
displayName: 'Examples on local machine tests'
- script: |
cd test
source unittest.sh
displayName: 'Unit test'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 naive_test.py
displayName: 'Integration tests'
displayName: 'Naive test'
- script: |
cd test
PATH=$HOME/.local/bin:$PATH python3 sdk_test.py
displayName: 'Built-in dispatcher tests'
PATH=$HOME/.local/bin:$PATH python3 tuner_test.py
displayName: 'Built-in tuners / assessors tests'
- job: 'macOS_10_13'
pool:
Expand All @@ -45,15 +51,24 @@ jobs:
- script: |
source install.sh
displayName: 'Install nni toolkit via source code'
- script: |
python3 -m pip install sklearn --user
python3 -m pip install torchvision --user
python3 -m pip install keras --user
displayName: 'Install dependencies for integration tests'
- script: |
cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH && source unittest.sh
displayName: 'Unit test'
- script: |
cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 naive_test.py
displayName: 'Integration tests'
displayName: 'Naive test'
- script: |
cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 tuner_test.py
displayName: 'Built-in tuners / assessors tests'
- script: |
cd test
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 sdk_test.py
displayName: 'Built-in dispatcher tests'
PATH=$HOME/Library/Python/3.7/bin:$PATH python3 config_test.py --ts local
displayName: 'Examples on local machine tests'
8 changes: 6 additions & 2 deletions examples/trials/cifar10_pytorch/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'''Train CIFAR10 with PyTorch.'''
from __future__ import print_function

import argparse
import torch
import torch.nn as nn
import torch.optim as optim
Expand Down Expand Up @@ -174,6 +174,10 @@ def test(epoch):


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--epochs", type=int, default=200)
args, _ = parser.parse_known_args()

try:
RCV_CONFIG = nni.get_next_parameter()
#RCV_CONFIG = {'lr': 0.1, 'optimizer': 'Adam', 'model':'senet18'}
Expand All @@ -182,7 +186,7 @@ def test(epoch):
prepare(RCV_CONFIG)
acc = 0.0
best_acc = 0.0
for epoch in range(start_epoch, start_epoch+200):
for epoch in range(start_epoch, start_epoch+args.epochs):
train(epoch)
acc, best_acc = test(epoch)
nni.report_intermediate_result(acc)
Expand Down
39 changes: 19 additions & 20 deletions examples/trials/mnist-annotation/mnist.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""A deep MNIST classifier using convolutional layers."""

import argparse
import logging
import math
import tempfile
Expand Down Expand Up @@ -180,7 +181,7 @@ def main(params):
test_acc = 0.0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
"""@nni.variable(nni.choice(1, 4, 8, 16, 32), name=batch_size)"""
"""@nni.variable(nni.choice(16, 32), name=batch_size)"""
batch_size = params['batch_size']
for i in range(params['batch_num']):
batch = mnist.train.next_batch(batch_size)
Expand Down Expand Up @@ -210,29 +211,27 @@ def main(params):
logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.')


def generate_default_params():
'''
Generate default parameters for mnist network.
'''
params = {
'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5,
'channel_1_num': 32,
'channel_2_num': 64,
'conv_size': 5,
'pool_size': 2,
'hidden_size': 1024,
'learning_rate': 1e-4,
'batch_num': 2000,
'batch_size': 32}
return params

def get_params():
''' Get parameters from command line '''
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
parser.add_argument("--channel_1_num", type=int, default=32)
parser.add_argument("--channel_2_num", type=int, default=64)
parser.add_argument("--conv_size", type=int, default=5)
parser.add_argument("--pool_size", type=int, default=2)
parser.add_argument("--hidden_size", type=int, default=1024)
parser.add_argument("--learning_rate", type=float, default=1e-4)
parser.add_argument("--batch_num", type=int, default=2000)
parser.add_argument("--batch_size", type=int, default=32)

args, _ = parser.parse_known_args()
return args

if __name__ == '__main__':
'''@nni.get_next_parameter()'''
try:
main(generate_default_params())
main(vars(get_params()))
except Exception as exception:
logger.exception(exception)
raise
47 changes: 23 additions & 24 deletions examples/trials/mnist/mnist.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""A deep MNIST classifier using convolutional layers."""

import argparse
import logging
import math
import tempfile
Expand Down Expand Up @@ -148,7 +149,8 @@ def main(params):
Main function, build mnist network, run and send result to NNI.
'''
# Import data
mnist = input_data.read_data_sets(params['data_dir'], one_hot=True)
data_dir = params['data_dir'] + str(nni.get_sequence_id())
mnist = input_data.read_data_sets(data_dir, one_hot=True)
print('Mnist download data down.')
logger.debug('Mnist download data down.')

Expand Down Expand Up @@ -198,33 +200,30 @@ def main(params):
logger.debug('Final result is %g', test_acc)
logger.debug('Send final result done.')


def generate_default_params():
'''
Generate default parameters for mnist network.
'''
params = {
'data_dir': '/tmp/tensorflow/mnist/input_data',
'dropout_rate': 0.5,
'channel_1_num': 32,
'channel_2_num': 64,
'conv_size': 5,
'pool_size': 2,
'hidden_size': 1024,
'learning_rate': 1e-4,
'batch_num': 2000,
'batch_size': 32}
return params

def get_params():
''' Get parameters from command line '''
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=str, default='/tmp/tensorflow/mnist/input_data', help="data directory")
parser.add_argument("--dropout_rate", type=float, default=0.5, help="dropout rate")
parser.add_argument("--channel_1_num", type=int, default=32)
parser.add_argument("--channel_2_num", type=int, default=64)
parser.add_argument("--conv_size", type=int, default=5)
parser.add_argument("--pool_size", type=int, default=2)
parser.add_argument("--hidden_size", type=int, default=1024)
parser.add_argument("--learning_rate", type=float, default=1e-4)
parser.add_argument("--batch_num", type=int, default=2000)
parser.add_argument("--batch_size", type=int, default=32)

args, _ = parser.parse_known_args()
return args

if __name__ == '__main__':
try:
# get parameters form tuner
RCV_PARAMS = nni.get_next_parameter()
logger.debug(RCV_PARAMS)
# run
params = generate_default_params()
params.update(RCV_PARAMS)
tuner_params = nni.get_next_parameter()
logger.debug(tuner_params)
params = vars(get_params())
params.update(tuner_params)
main(params)
except Exception as exception:
logger.exception(exception)
Expand Down
127 changes: 127 additions & 0 deletions test/config_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import os
import argparse
import glob
import subprocess
import sys
import time
import traceback

from utils import setup_experiment, get_experiment_status, get_yml_content, dump_yml_content, \
parse_max_duration_time, get_succeeded_trial_num, print_stderr

GREEN = '\33[32m'
RED = '\33[31m'
CLEAR = '\33[0m'

STATUS_URL = 'http://localhost:8080/api/v1/nni/check-status'
TRIAL_JOBS_URL = 'http://localhost:8080/api/v1/nni/trial-jobs'

def gen_new_config(config_file, training_service='local'):
'''
Generates temporary config file for integration test, the file
should be deleted after testing.
'''
config = get_yml_content(config_file)
new_config_file = config_file + '.tmp'

ts = get_yml_content('training_service.yml')[training_service]
print(config)
print(ts)
config.update(ts)
print(config)
dump_yml_content(new_config_file, config)

return new_config_file, config

def run_test(config_file, training_service, local_gpu=False):
'''run test per configuration file'''

new_config_file, config = gen_new_config(config_file, training_service)

if training_service == 'local' and not local_gpu and config['trial']['gpuNum'] > 0:
print('no gpu, skiping: ', config_file)
return

try:
print('Testing %s...' % config_file)
proc = subprocess.run(['nnictl', 'create', '--config', new_config_file])
assert proc.returncode == 0, '`nnictl create` failed with code %d' % proc.returncode

max_duration, max_trial_num = get_max_values(config_file)
sleep_interval = 3

for _ in range(0, max_duration+30, sleep_interval):
time.sleep(sleep_interval)
status = get_experiment_status(STATUS_URL)
if status == 'DONE':
num_succeeded = get_succeeded_trial_num(TRIAL_JOBS_URL)
if training_service == 'local':
print_stderr(TRIAL_JOBS_URL)
assert num_succeeded == max_trial_num, 'only %d succeeded trial jobs, there should be %d' % (num_succeeded, max_trial_num)
break

assert status == 'DONE', 'Failed to finish in maxExecDuration'
finally:
if os.path.exists(new_config_file):
os.remove(new_config_file)

def get_max_values(config_file):
'''Get maxExecDuration and maxTrialNum of experiment'''
experiment_config = get_yml_content(config_file)
return parse_max_duration_time(experiment_config['maxExecDuration']), experiment_config['maxTrialNum']

def run(args):
'''test all configuration files'''
if args.config is None:
config_files = glob.glob('./config_test/**/*.test.yml')
else:
config_files = args.config.split(',')
print(config_files)

for config_file in config_files:
try:
# sleep 5 seconds here, to make sure previous stopped exp has enough time to exit to avoid port conflict
time.sleep(5)
run_test(config_file, args.ts, args.local_gpu)
print(GREEN + 'Test %s: TEST PASS' % (config_file) + CLEAR)
except Exception as error:
print(RED + 'Test %s: TEST FAIL' % (config_file) + CLEAR)
print('%r' % error)
traceback.print_exc()
raise error
finally:
subprocess.run(['nnictl', 'stop'])

if __name__ == '__main__':
import tensorflow as tf
print('TF VERSION:', tf.__version__)
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default=None)
parser.add_argument("--ts", type=str, choices=['local', 'remote', 'pai'], default='local')
parser.add_argument("--local_gpu", action='store_true')
parser.add_argument("--preinstall", action='store_true')
args = parser.parse_args()

setup_experiment(args.preinstall)

run(args)
25 changes: 25 additions & 0 deletions test/config_test/examples/cifar10-pytorch.test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
authorName: nni
experimentName: default_test
maxExecDuration: 15m
maxTrialNum: 2
trialConcurrency: 2
searchSpacePath: ./cifar10_search_space.json

tuner:
builtinTunerName: Random
classArgs:
optimize_mode: maximize
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/cifar10_pytorch
command: python3 main.py --epochs 2
gpuNum: 1

useAnnotation: false
multiPhase: false
multiThread: false

trainingServicePlatform: local
Loading

0 comments on commit 3fcc5e9

Please sign in to comment.