diff --git a/docs/StartExperiment.md b/docs/StartExperiment.md new file mode 100644 index 0000000000..30f736ff15 --- /dev/null +++ b/docs/StartExperiment.md @@ -0,0 +1,33 @@ +How to start an experiment +=== +## 1.Introduce +There are few steps to start an new experiment of nni, here are the process. + +## 2.Details +### 2.1 Check environment +The first step to start an experiment is to check whether the environment is ready, nnictl will check if there is an old experiment running or the port of restfurl server is occupied. +NNICTL will also validate the content of config yaml file, to ensure the experiment config is in correct format. + +### 2.2 Start restful server +After check environment, nnictl will start an restful server process to manage nni experiment, the devault port is 51188. + +### 2.3 Check restful server +Before next steps, nnictl will check whether restful server is successfully started, or the starting process will stop and show error message. + +### 2.4 Set experiment config +NNICTL need to set experiment config before start an experiment, experiment config includes the config values in config yaml file. + +### 2.5 Check experiment cofig +NNICTL will ensure the request to set config is successfully executed. + +### 2.6 Start Web UI +NNICTL will start a Web UI process to show Web UI information,the default port of Web UI is 8080. + +### 2.7 Check Web UI +If Web UI is not successfully started, nnictl will give a warning information, and will continue to start experiment. + +### 2.8 Start Experiment +This is the most import step of starting an nni experiment, nnictl will call restful server process to setup an experiment. + +### 2.9 Check experiment +After start experiment, nnictl will check whether the experiment is correctly created, and show more information of this experiment to users. \ No newline at end of file diff --git a/docs/img/experiment_process.jpg b/docs/img/experiment_process.jpg new file mode 100644 index 0000000000..141e41cad9 Binary files /dev/null and b/docs/img/experiment_process.jpg differ diff --git a/examples/trials/auto-gbdt/config.yml b/examples/trials/auto-gbdt/config.yml index 8a2569d1a8..e6f3b963ac 100644 --- a/examples/trials/auto-gbdt/config.yml +++ b/examples/trials/auto-gbdt/config.yml @@ -3,7 +3,7 @@ experimentName: example_auto-gbdt trialConcurrency: 1 maxExecDuration: 10h maxTrialNum: 10 -#choice: local, remote +#choice: local, remote, pai trainingServicePlatform: local searchSpacePath: search_space.json #choice: true, false diff --git a/examples/trials/auto-gbdt/config_pai.yml b/examples/trials/auto-gbdt/config_pai.yml new file mode 100644 index 0000000000..26577cf83a --- /dev/null +++ b/examples/trials/auto-gbdt/config_pai.yml @@ -0,0 +1,36 @@ +authorName: default +experimentName: example_auto-gbdt +trialConcurrency: 1 +maxExecDuration: 10h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: pai +searchSpacePath: search_space.json +#choice: true, false +useAnnotation: false +tuner: + #choice: TPE, Random, Anneal, Evolution, + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: TPE + classArgs: + #choice: maximize, minimize + optimize_mode: minimize +trial: + command: python3 main.py + codeDir: . + gpuNum: 0 + cpuNum: 1 + memoryMB: 8196 + #The docker image to run nni job on pai + image: openpai/pai.example.tensorflow + #The hdfs directory to store data on pai, format 'hdfs://host:port/directory' + hdfsDataDir: hdfs://10.10.10.10:9000/username/nni + #The hdfs directory to store output data generated by nni, format 'hdfs://host:port/directory' + hdfsOutputDir: hdfs://10.10.10.10:9000/username/nni +paiConfig: + #The username to login pai + userName: username + #The password to login pai + passWord: password + #The host of restful server of pai + host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/ga_squad/config.yml b/examples/trials/ga_squad/config.yml index a0b1480992..c6fec5bcbc 100644 --- a/examples/trials/ga_squad/config.yml +++ b/examples/trials/ga_squad/config.yml @@ -3,7 +3,7 @@ experimentName: example_ga_squad trialConcurrency: 1 maxExecDuration: 1h maxTrialNum: 10 -#choice: local, remote +#choice: local, remote, pai trainingServicePlatform: local #choice: true, false useAnnotation: false diff --git a/examples/trials/ga_squad/config_pai.yml b/examples/trials/ga_squad/config_pai.yml new file mode 100644 index 0000000000..56c2d33069 --- /dev/null +++ b/examples/trials/ga_squad/config_pai.yml @@ -0,0 +1,34 @@ +authorName: default +experimentName: example_ga_squad +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: pai +#choice: true, false +useAnnotation: false +tuner: + codeDir: ../tuners/ga_customer_tuner + classFileName: customer_tuner.py + className: CustomerTuner + classArgs: + optimize_mode: maximize +trial: + command: python3 trial.py + codeDir: . + gpuNum: 0 + cpuNum: 1 + memoryMB: 8196 + #The docker image to run nni job on pai + image: openpai/pai.example.tensorflow + #The hdfs directory to store data on pai, format 'hdfs://host:port/directory' + hdfsDataDir: hdfs://10.10.10.10:9000/username/nni + #The hdfs directory to store output data generated by nni, format 'hdfs://host:port/directory' + hdfsOutputDir: hdfs://10.10.10.10:9000/username/nni +paiConfig: + #The username to login pai + userName: username + #The password to login pai + passWord: password + #The host of restful server of pai + host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-annotation/config.yml b/examples/trials/mnist-annotation/config.yml index 84c31166a8..b0555ad3a2 100644 --- a/examples/trials/mnist-annotation/config.yml +++ b/examples/trials/mnist-annotation/config.yml @@ -3,7 +3,7 @@ experimentName: example_mnist trialConcurrency: 1 maxExecDuration: 1h maxTrialNum: 10 -#choice: local, remote +#choice: local, remote, pai trainingServicePlatform: local #choice: true, false useAnnotation: true diff --git a/examples/trials/mnist-annotation/config_pai.yml b/examples/trials/mnist-annotation/config_pai.yml new file mode 100644 index 0000000000..edb9e62384 --- /dev/null +++ b/examples/trials/mnist-annotation/config_pai.yml @@ -0,0 +1,35 @@ +authorName: default +experimentName: example_mnist +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: pai +#choice: true, false +useAnnotation: true +tuner: + #choice: TPE, Random, Anneal, Evolution, + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: TPE + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 mnist.py + codeDir: . + gpuNum: 0 + cpuNum: 1 + memoryMB: 8196 + #The docker image to run nni job on pai + image: openpai/pai.example.tensorflow + #The hdfs directory to store data on pai, format 'hdfs://host:port/directory' + hdfsDataDir: hdfs://10.10.10.10:9000/username/nni + #The hdfs directory to store output data generated by nni, format 'hdfs://host:port/directory' + hdfsOutputDir: hdfs://10.10.10.10:9000/username/nni +paiConfig: + #The username to login pai + userName: username + #The password to login pai + passWord: password + #The host of restful server of pai + host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-batch-tune-keras/config.yml b/examples/trials/mnist-batch-tune-keras/config.yml index 1bb85085c5..e0722f9117 100644 --- a/examples/trials/mnist-batch-tune-keras/config.yml +++ b/examples/trials/mnist-batch-tune-keras/config.yml @@ -3,7 +3,7 @@ experimentName: example_mnist-keras trialConcurrency: 1 maxExecDuration: 1h maxTrialNum: 10 -#choice: local, remote +#choice: local, remote, pai trainingServicePlatform: local searchSpacePath: search_space.json #choice: true, false diff --git a/examples/trials/mnist-batch-tune-keras/config_pai.yml b/examples/trials/mnist-batch-tune-keras/config_pai.yml new file mode 100644 index 0000000000..183c220e2d --- /dev/null +++ b/examples/trials/mnist-batch-tune-keras/config_pai.yml @@ -0,0 +1,36 @@ +authorName: default +experimentName: example_mnist-keras +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: pai +searchSpacePath: search_space.json +#choice: true, false +useAnnotation: false +tuner: + #choice: TPE, Random, Anneal, Evolution, BatchTuner + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: BatchTuner + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 mnist-keras.py + codeDir: . + gpuNum: 0 + cpuNum: 1 + memoryMB: 8196 + #The docker image to run nni job on pai + image: openpai/pai.example.tensorflow + #The hdfs directory to store data on pai, format 'hdfs://host:port/directory' + hdfsDataDir: hdfs://10.10.10.10:9000/username/nni + #The hdfs directory to store output data generated by nni, format 'hdfs://host:port/directory' + hdfsOutputDir: hdfs://10.10.10.10:9000/username/nni +paiConfig: + #The username to login pai + userName: username + #The password to login pai + passWord: password + #The host of restful server of pai + host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-keras/config.yml b/examples/trials/mnist-keras/config.yml index c1792e2a65..6ea1c2a367 100644 --- a/examples/trials/mnist-keras/config.yml +++ b/examples/trials/mnist-keras/config.yml @@ -3,7 +3,7 @@ experimentName: example_mnist-keras trialConcurrency: 1 maxExecDuration: 1h maxTrialNum: 10 -#choice: local, remote +#choice: local, remote, pai trainingServicePlatform: local searchSpacePath: search_space.json #choice: true, false diff --git a/examples/trials/mnist-keras/config_pai.yml b/examples/trials/mnist-keras/config_pai.yml new file mode 100644 index 0000000000..bbf8136144 --- /dev/null +++ b/examples/trials/mnist-keras/config_pai.yml @@ -0,0 +1,36 @@ +authorName: default +experimentName: example_mnist-keras +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: pai +searchSpacePath: search_space.json +#choice: true, false +useAnnotation: false +tuner: + #choice: TPE, Random, Anneal, Evolution, + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: TPE + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 mnist-keras.py + codeDir: . + gpuNum: 0 + cpuNum: 1 + memoryMB: 8196 + #The docker image to run nni job on pai + image: openpai/pai.example.tensorflow + #The hdfs directory to store data on pai, format 'hdfs://host:port/directory' + hdfsDataDir: hdfs://10.10.10.10:9000/username/nni + #The hdfs directory to store output data generated by nni, format 'hdfs://host:port/directory' + hdfsOutputDir: hdfs://10.10.10.10:9000/username/nni +paiConfig: + #The username to login pai + userName: username + #The password to login pai + passWord: password + #The host of restful server of pai + host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist-smartparam/config.yml b/examples/trials/mnist-smartparam/config.yml index a69e801ed8..912eabb24e 100644 --- a/examples/trials/mnist-smartparam/config.yml +++ b/examples/trials/mnist-smartparam/config.yml @@ -3,7 +3,7 @@ experimentName: example_mnist-smartparam trialConcurrency: 1 maxExecDuration: 1h maxTrialNum: 10 -#choice: local, remote +#choice: local, remote, pai trainingServicePlatform: local #choice: true, false useAnnotation: true diff --git a/examples/trials/mnist-smartparam/config_pai.yml b/examples/trials/mnist-smartparam/config_pai.yml new file mode 100644 index 0000000000..4b5a088d11 --- /dev/null +++ b/examples/trials/mnist-smartparam/config_pai.yml @@ -0,0 +1,35 @@ +authorName: default +experimentName: example_mnist-smartparam +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: pai +#choice: true, false +useAnnotation: true +tuner: + #choice: TPE, Random, Anneal, Evolution, + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: TPE + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 mnist.py + codeDir: . + gpuNum: 0 + cpuNum: 1 + memoryMB: 8196 + #The docker image to run nni job on pai + image: openpai/pai.example.tensorflow + #The hdfs directory to store data on pai, format 'hdfs://host:port/directory' + hdfsDataDir: hdfs://10.10.10.10:9000/username/nni + #The hdfs directory to store output data generated by nni, format 'hdfs://host:port/directory' + hdfsOutputDir: hdfs://10.10.10.10:9000/username/nni +paiConfig: + #The username to login pai + userName: username + #The password to login pai + passWord: password + #The host of restful server of pai + host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/mnist/config.yml b/examples/trials/mnist/config.yml index 5eab536228..2f6141fa45 100644 --- a/examples/trials/mnist/config.yml +++ b/examples/trials/mnist/config.yml @@ -3,7 +3,7 @@ experimentName: example_mnist trialConcurrency: 1 maxExecDuration: 1h maxTrialNum: 10 -#choice: local, remote +#choice: local, remote, pai trainingServicePlatform: local searchSpacePath: search_space.json #choice: true, false diff --git a/examples/trials/mnist/config_pai.yml b/examples/trials/mnist/config_pai.yml new file mode 100644 index 0000000000..a20fdce40b --- /dev/null +++ b/examples/trials/mnist/config_pai.yml @@ -0,0 +1,36 @@ +authorName: default +experimentName: example_mnist +trialConcurrency: 1 +maxExecDuration: 1h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: pai +searchSpacePath: search_space.json +#choice: true, false +useAnnotation: false +tuner: + #choice: TPE, Random, Anneal, Evolution, + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: TPE + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 mnist.py + codeDir: . + gpuNum: 0 + cpuNum: 1 + memoryMB: 8196 + #The docker image to run nni job on pai + image: openpai/pai.example.tensorflow + #The hdfs directory to store data on pai, format 'hdfs://host:port/directory' + hdfsDataDir: hdfs://10.10.10.10:9000/username/nni + #The hdfs directory to store output data generated by nni, format 'hdfs://host:port/directory' + hdfsOutputDir: hdfs://10.10.10.10:9000/username/nni +paiConfig: + #The username to login pai + userName: username + #The password to login pai + passWord: password + #The host of restful server of pai + host: 10.10.10.10 \ No newline at end of file diff --git a/examples/trials/pytorch_cifar10/config.yml b/examples/trials/pytorch_cifar10/config.yml index 655dccd95f..1d6f847805 100644 --- a/examples/trials/pytorch_cifar10/config.yml +++ b/examples/trials/pytorch_cifar10/config.yml @@ -3,7 +3,7 @@ experimentName: example_pytorch_cifar10 trialConcurrency: 1 maxExecDuration: 100h maxTrialNum: 10 -#choice: local, remote +#choice: local, remote, pai trainingServicePlatform: local searchSpacePath: search_space.json #choice: true, false diff --git a/examples/trials/pytorch_cifar10/config_pai.yml b/examples/trials/pytorch_cifar10/config_pai.yml new file mode 100644 index 0000000000..783285f815 --- /dev/null +++ b/examples/trials/pytorch_cifar10/config_pai.yml @@ -0,0 +1,36 @@ +authorName: default +experimentName: example_pytorch_cifar10 +trialConcurrency: 1 +maxExecDuration: 100h +maxTrialNum: 10 +#choice: local, remote, pai +trainingServicePlatform: pai +searchSpacePath: search_space.json +#choice: true, false +useAnnotation: false +tuner: + #choice: TPE, Random, Anneal, Evolution, + #SMAC (SMAC should be installed through nnictl) + builtinTunerName: TPE + classArgs: + #choice: maximize, minimize + optimize_mode: maximize +trial: + command: python3 main.py + codeDir: . + gpuNum: 1 + cpuNum: 1 + memoryMB: 8196 + #The docker image to run nni job on pai + image: openpai/pai.example.tensorflow + #The hdfs directory to store data on pai, format 'hdfs://host:port/directory' + hdfsDataDir: hdfs://10.10.10.10:9000/username/nni + #The hdfs directory to store output data generated by nni, format 'hdfs://host:port/directory' + hdfsOutputDir: hdfs://10.10.10.10:9000/username/nni +paiConfig: + #The username to login pai + userName: username + #The password to login pai + passWord: password + #The host of restful server of pai + host: 10.10.10.10 diff --git a/tools/nnicmd/common_utils.py b/tools/nnicmd/common_utils.py index bb2a4f236c..05afed1e28 100644 --- a/tools/nnicmd/common_utils.py +++ b/tools/nnicmd/common_utils.py @@ -21,7 +21,7 @@ import json import yaml import psutil -from .constants import ERROR_INFO, NORMAL_INFO +from .constants import ERROR_INFO, NORMAL_INFO, WARNING_INFO, COLOR_RED_FORMAT, COLOR_YELLOW_FORMAT def get_yml_content(file_path): '''Load yaml file content''' @@ -43,12 +43,16 @@ def get_json_content(file_path): def print_error(content): '''Print error information to screen''' - print(ERROR_INFO % content) + print(COLOR_RED_FORMAT % (ERROR_INFO % content)) def print_normal(content): '''Print error information to screen''' print(NORMAL_INFO % content) +def print_warning(content): + '''Print warning information to screen''' + print(COLOR_YELLOW_FORMAT % (WARNING_INFO % content)) + def detect_process(pid): '''Detect if a process is alive''' try: diff --git a/tools/nnicmd/constants.py b/tools/nnicmd/constants.py index b03b1bdcbe..14467f02ed 100644 --- a/tools/nnicmd/constants.py +++ b/tools/nnicmd/constants.py @@ -34,22 +34,37 @@ STDERR_FULL_PATH = os.path.join(LOG_DIR, 'stderr') -ERROR_INFO = 'Error: %s' +ERROR_INFO = 'ERROR: %s' -NORMAL_INFO = 'Info: %s' +NORMAL_INFO = 'INFO: %s' -WARNING_INFO = 'Waining: %s' +WARNING_INFO = 'WARNING: %s' -EXPERIMENT_SUCCESS_INFO = 'Start experiment success! The experiment id is %s, and the restful server post is %s.\n' \ - 'You can use these commands to get more information about this experiment:\n' \ +EXPERIMENT_SUCCESS_INFO = '\033[1;32;32mSuccessfully started experiment!\n\033[0m' \ + '-----------------------------------------------------------------------\n' \ + 'The experiment id is %s\n'\ + 'The restful server post is %s\n' \ + 'The Web UI urls are: %s\n' \ + '-----------------------------------------------------------------------\n\n' \ + 'You can use these commands to get more information about the experiment\n' \ + '-----------------------------------------------------------------------\n' \ ' commands description\n' \ '1. nnictl experiment show show the information of experiments\n' \ '2. nnictl trial ls list all of trial jobs\n' \ - '3. nnictl stop stop a experiment\n' \ - '4. nnictl trial kill kill a trial job by id\n' \ - '5. nnictl --help get help information about nnictl\n' \ - '6. nnictl webui url get the url of web ui' + '3. nnictl log stderr show stderr log content\n' \ + '4. nnictl log stdout show stdout log content\n' \ + '5. nnictl stop stop a experiment\n' \ + '6. nnictl trial kill kill a trial job by id\n' \ + '7. nnictl webui url get the url of web ui\n' \ + '8. nnictl --help get help information about nnictl\n' \ + '-----------------------------------------------------------------------\n' \ PACKAGE_REQUIREMENTS = { 'SMAC': 'smac_tuner' } + +COLOR_RED_FORMAT = '\033[1;31;31m%s\033[0m' + +COLOR_GREEN_FORMAT = '\033[1;32;32m%s\033[0m' + +COLOR_YELLOW_FORMAT = '\033[1;33;33m%s\033[0m' \ No newline at end of file diff --git a/tools/nnicmd/launcher.py b/tools/nnicmd/launcher.py index 6570a75eee..78eb999851 100644 --- a/tools/nnicmd/launcher.py +++ b/tools/nnicmd/launcher.py @@ -30,13 +30,13 @@ from .rest_utils import rest_put, rest_post, check_rest_server, check_rest_server_quick, check_response from .url_utils import cluster_metadata_url, experiment_url from .config_utils import Config -from .common_utils import get_yml_content, get_json_content, print_error, print_normal, detect_process -from .constants import EXPERIMENT_SUCCESS_INFO, STDOUT_FULL_PATH, STDERR_FULL_PATH, LOG_DIR, REST_PORT, ERROR_INFO, NORMAL_INFO +from .common_utils import get_yml_content, get_json_content, print_error, print_normal, print_warning, detect_process +from .constants import * from .webui_utils import start_web_ui, check_web_ui def start_rest_server(port, platform, mode, experiment_id=None): '''Run nni manager process''' - print_normal('Checking experiment...') + print_normal('Checking environment...') nni_config = Config() rest_port = nni_config.get_config('restServerPort') running, _ = check_rest_server_quick(rest_port) @@ -204,10 +204,9 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No experiment_config['searchSpace'] = json.dumps('') # check rest server - print_normal('Checking restful server...') running, _ = check_rest_server(REST_PORT) if running: - print_normal('Restful server start success!') + print_normal('Successfully started Restful server!') else: print_error('Restful server start failed!') try: @@ -236,7 +235,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No if experiment_config['trainingServicePlatform'] == 'local': print_normal('Setting local config...') if set_local_config(experiment_config, REST_PORT): - print_normal('Success!') + print_normal('Successfully set local config!') else: print_error('Failed!') try: @@ -251,7 +250,7 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No print_normal('Setting pai config...') config_result, err_msg = set_pai_config(experiment_config, REST_PORT) if config_result: - print_normal('Success!') + print_normal('Successfully set pai config!') else: if err_msg: print_error('Failed! Error is: {}'.format(err_msg)) @@ -259,8 +258,19 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No cmds = ['pkill', '-P', str(rest_process.pid)] call(cmds) except Exception: - raise Exception(ERROR_INFO % 'Rest server stopped!') + raise Exception(ERROR_INFO % 'Restful server stopped!') exit(0) + + #start webui + if check_web_ui(): + print_warning('{0} {1}'.format(' '.join(nni_config.get_config('webuiUrl')),'is being used, please stop it first!')) + print_normal('You can use \'nnictl webui stop\' to stop old Web UI process...') + else: + print_normal('Starting Web UI...') + webui_process = start_web_ui(webuiport) + if webui_process: + nni_config.set_config('webuiPid', webui_process.pid) + print_normal('Successfully started Web UI!') # start a new experiment print_normal('Starting experiment...') @@ -274,25 +284,12 @@ def launch_experiment(args, experiment_config, mode, webuiport, experiment_id=No try: cmds = ['pkill', '-P', str(rest_process.pid)] call(cmds) + cmds = ['pkill', '-P', str(webui_process.pid)] + call(cmds) except Exception: - raise Exception(ERROR_INFO % 'Rest server stopped!') + raise Exception(ERROR_INFO % 'Restful server stopped!') exit(0) - - #start webui - print_normal('Checking web ui...') - if check_web_ui(): - print_error('{0} {1}'.format(' '.join(nni_config.get_config('webuiUrl')),'is being used, please stop it first!')) - print_normal('You can use \'nnictl webui stop\' to stop old web ui process...') - else: - print_normal('Starting web ui...') - webui_process = start_web_ui(webuiport) - if webui_process: - nni_config.set_config('webuiPid', webui_process.pid) - print_normal('Starting web ui success!') - print_normal('{0} {1}'.format('Web UI url:', ' '.join(nni_config.get_config('webuiUrl')))) - - print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, REST_PORT)) - + print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, REST_PORT, ' '.join(nni_config.get_config('webuiUrl')))) def resume_experiment(args): '''resume an experiment''' diff --git a/tools/nnicmd/nnictl_utils.py b/tools/nnicmd/nnictl_utils.py index 2b7628ec67..d071741f5b 100644 --- a/tools/nnicmd/nnictl_utils.py +++ b/tools/nnicmd/nnictl_utils.py @@ -64,17 +64,20 @@ def stop_experiment(args): stop_web_ui() return running, _ = check_rest_server_quick(rest_port) + stop_rest_result = True if running: response = rest_delete(experiment_url(rest_port), 20) if not response or not check_response(response): print_error('Stop experiment failed!') + stop_rest_result = False #sleep to wait rest handler done time.sleep(3) rest_pid = nni_config.get_config('restServerPid') cmds = ['pkill', '-P', str(rest_pid)] call(cmds) stop_web_ui() - print_normal('Stop experiment success!') + if stop_rest_result: + print_normal('Stop experiment success!') def trial_ls(args): '''List trial'''