diff --git a/docs/NNICTLDOC.md b/docs/NNICTLDOC.md index 8139f5b8c4..705bbc1ef5 100644 --- a/docs/NNICTLDOC.md +++ b/docs/NNICTLDOC.md @@ -49,7 +49,8 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | - | --experiment, -e| False| |ID of the experiment you want to resume| + | id| False| |The id of the experiment you want to resume| + | --port, -p| False| |Rest port of the experiment you want to resume| @@ -87,8 +88,8 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | + | id| False| |ID of the experiment you want to set| | --filename, -f| True| |the file storing your new search space| - | --id, -i| False| |ID of the experiment you want to set| * __nnictl update concurrency__ * Description @@ -103,8 +104,8 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | + | id| False| |ID of the experiment you want to set| | --value, -v| True| |the number of allowed concurrent trials| - | --id, -i| False| |ID of the experiment you want to set| * __nnictl update duration__ * Description @@ -119,8 +120,8 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | - | --value, -v| True| |the experiment duration will be NUMBER seconds. SUFFIX may be 's' for seconds (the default), 'm' for minutes, 'h' for hours or 'd' for days.| - | --id, -i| False| |ID of the experiment you want to set| + | id| False| |ID of the experiment you want to set| + | --value, -v| True| |the experiment duration will be NUMBER seconds. SUFFIX may be 's' for seconds (the default), 'm' for minutes, 'h' for hours or 'd' for days.| * __nnictl trial__ @@ -137,7 +138,7 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | - | --id, -i| False| |ID of the experiment you want to set| + | id| False| |ID of the experiment you want to set| * __nnictl trial kill__ * Description @@ -151,9 +152,8 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | + | id| False| |ID of the experiment you want to set| | --trialid, -t| True| |ID of the trial you want to kill.| - | --id, -i| False| |ID of the experiment you want to set| - @@ -171,7 +171,7 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | - | --id, -i| False| |ID of the experiment you want to set| + | id| False| |ID of the experiment you want to set| * __nnictl experiment status__ @@ -186,17 +186,23 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | - | --id, -i| False| |ID of the experiment you want to set| + | id| False| |ID of the experiment you want to set| * __nnictl experiment list__ * Description - Show the id and start time of all running experiments. + Show the information of all the (running) experiments. * Usage nnictl experiment list + Options: + + | Name, shorthand | Required|Default | Description | + | ------ | ------ | ------ |------ | + | all| False| False|Show all of experiments, including stopped experiments.| + * __nnictl config show__ @@ -223,10 +229,11 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | + | id| False| |ID of the experiment you want to set| | --head, -h| False| |show head lines of stdout| | --tail, -t| False| |show tail lines of stdout| | --path, -p| False| |show the path of stdout file| - | --id, -i| False| |ID of the experiment you want to set| + * __nnictl log stderr__ * Description @@ -241,10 +248,11 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | + | id| False| |ID of the experiment you want to set| | --head, -h| False| |show head lines of stderr| | --tail, -t| False| |show tail lines of stderr| | --path, -p| False| |show the path of stderr file| - | --id, -i| False| |ID of the experiment you want to set| + * __nnictl log trial__ * Description @@ -259,7 +267,7 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | - | --id, -I| False| |the id of trial| + | id| False| |the id of trial| ### Manage webui @@ -276,4 +284,4 @@ nnictl webui | Name, shorthand | Required|Default | Description | | ------ | ------ | ------ |------ | - | --id, -i| False| |ID of the experiment you want to set| \ No newline at end of file + | id| False| |ID of the experiment you want to set| \ No newline at end of file diff --git a/tools/nnicmd/config_utils.py b/tools/nnicmd/config_utils.py index 9e1fb7ae91..17adb05fd6 100644 --- a/tools/nnicmd/config_utils.py +++ b/tools/nnicmd/config_utils.py @@ -26,8 +26,8 @@ class Config: '''a util class to load and save config''' - def __init__(self, port): - config_path = os.path.join(NNICTL_HOME_DIR, str(port)) + def __init__(self, file_path): + config_path = os.path.join(NNICTL_HOME_DIR, str(file_path)) os.makedirs(config_path, exist_ok=True) self.config_file = os.path.join(config_path, '.config') self.config = self.read_file() @@ -73,11 +73,24 @@ def __init__(self): self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment') self.experiments = self.read_file() - def add_experiment(self, id, port, time): + def add_experiment(self, id, port, time, file_name): '''set {key:value} paris to self.experiment''' - self.experiments[id] = [port, time] + self.experiments[id] = {} + self.experiments[id]['port'] = port + self.experiments[id]['startTime'] = time + self.experiments[id]['endTime'] = 'N/A' + self.experiments[id]['status'] = 'running' + self.experiments[id]['fileName'] = file_name self.write_file() + def update_experiment(self, id, key, value): + '''Update experiment''' + if id not in self.experiments: + return False + self.experiments[id][key] = value + self.write_file() + return True + def remove_experiment(self, id): '''remove an experiment by id''' if id in self.experiments: diff --git a/tools/nnicmd/constants.py b/tools/nnicmd/constants.py index 71c3d2112c..fec3b47b24 100644 --- a/tools/nnicmd/constants.py +++ b/tools/nnicmd/constants.py @@ -54,11 +54,13 @@ EXPERIMENT_START_FAILED_INFO = 'There is an experiment running in the port %d, please stop it first or set another port!\n' \ 'You could use \'nnictl stop --port [PORT]\' command to stop an experiment!\nOr you could use \'nnictl create --config [CONFIG_PATH] --port [PORT]\' to set port!\n' -EXPERIMENT_ID_INFO = '-----------------------------------------------------------------------\n' \ +EXPERIMENT_INFORMATION_FORMAT = '-----------------------------------------------------------------------\n' \ ' Experiment information\n' \ '%s\n' \ '-----------------------------------------------------------------------\n' +EXPERIMENT_DETAIL_FORMAT = 'Id: %s Status: %s StartTime: %s EndTime: %s \n' + PACKAGE_REQUIREMENTS = { 'SMAC': 'smac_tuner' } diff --git a/tools/nnicmd/launcher.py b/tools/nnicmd/launcher.py index c9da0a4518..519a82383e 100644 --- a/tools/nnicmd/launcher.py +++ b/tools/nnicmd/launcher.py @@ -34,17 +34,12 @@ from .constants import * from .webui_utils import * import time +import random +import string -def start_rest_server(port, platform, mode, experiment_id=None): +def start_rest_server(port, platform, mode, config_file_name, experiment_id=None): '''Run nni manager process''' - print_normal('Checking environment...') - nni_config = Config(port) - rest_port = nni_config.get_config('restServerPort') - running, _ = check_rest_server_quick(rest_port) - if rest_port and running: - print_error(EXPERIMENT_START_FAILED_INFO % port) - exit(1) - + nni_config = Config(config_file_name) if detect_port(port): print_error('Port %s is used by another process, please reset the port!' % port) exit(1) @@ -54,8 +49,8 @@ def start_rest_server(port, platform, mode, experiment_id=None): cmds = [manager, '--port', str(port), '--mode', platform, '--start_mode', mode] if mode == 'resume': cmds += ['--experiment_id', experiment_id] - stdout_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stdout') - stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr') + stdout_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stdout') + stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr') stdout_file = open(stdout_full_path, 'a+') stderr_file = open(stderr_full_path, 'a+') time_now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) @@ -66,7 +61,7 @@ def start_rest_server(port, platform, mode, experiment_id=None): process = Popen(cmds, stdout=stdout_file, stderr=stderr_file) return process, str(time_now) -def set_trial_config(experiment_config, port): +def set_trial_config(experiment_config, port, config_file_name): '''set trial configuration''' request_data = dict() value_dict = dict() @@ -89,16 +84,16 @@ def set_trial_config(experiment_config, port): return True else: print('Error message is {}'.format(response.text)) - stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr') + stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr') with open(stderr_full_path, 'a+') as fout: fout.write(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':'))) return False -def set_local_config(experiment_config, port): +def set_local_config(experiment_config, port, config_file_name): '''set local configuration''' - return set_trial_config(experiment_config, port) + return set_trial_config(experiment_config, port, config_file_name) -def set_remote_config(experiment_config, port): +def set_remote_config(experiment_config, port, config_file_name): '''Call setClusterMetadata to pass trial''' #set machine_list request_data = dict() @@ -108,15 +103,15 @@ def set_remote_config(experiment_config, port): if not response or not check_response(response): if response is not None: err_message = response.text - stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr') + stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr') with open(stderr_full_path, 'a+') as fout: fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':'))) return False, err_message #set trial_config - return set_trial_config(experiment_config, port), err_message + return set_trial_config(experiment_config, port, config_file_name), err_message -def set_pai_config(experiment_config, port): +def set_pai_config(experiment_config, port, config_file_name): '''set pai configuration''' pai_config_data = dict() pai_config_data['pai_config'] = experiment_config['paiConfig'] @@ -125,15 +120,15 @@ def set_pai_config(experiment_config, port): if not response or not response.status_code == 200: if response is not None: err_message = response.text - stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr') + stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr') with open(stderr_full_path, 'a+') as fout: fout.write(json.dumps(json.loads(err_message), indent=4, sort_keys=True, separators=(',', ':'))) return False, err_message #set trial_config - return set_trial_config(experiment_config, port), err_message + return set_trial_config(experiment_config, port, config_file_name), err_message -def set_experiment(experiment_config, mode, port): +def set_experiment(experiment_config, mode, port, config_file_name): '''Call startExperiment (rest POST /experiment) with yaml file content''' request_data = dict() request_data['authorName'] = experiment_config['authorName'] @@ -191,17 +186,17 @@ def set_experiment(experiment_config, mode, port): if check_response(response): return response else: - stderr_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr') + stderr_full_path = os.path.join(NNICTL_HOME_DIR, config_file_name, 'stderr') with open(stderr_full_path, 'a+') as fout: fout.write(json.dumps(json.loads(response.text), indent=4, sort_keys=True, separators=(',', ':'))) print_error('Setting experiment error, error message is {}'.format(response.text)) return None -def launch_experiment(args, experiment_config, mode, experiment_id=None): +def launch_experiment(args, experiment_config, mode, config_file_name, experiment_id=None): '''follow steps to start rest server and start experiment''' - nni_config = Config(args.port) + nni_config = Config(config_file_name) # start rest server - rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, experiment_id) + rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], mode, config_file_name, experiment_id) nni_config.set_config('restServerPid', rest_process.pid) # Deal with annotation if experiment_config.get('useAnnotation'): @@ -236,7 +231,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None): # set remote config if experiment_config['trainingServicePlatform'] == 'remote': print_normal('Setting remote config...') - config_result, err_msg = set_remote_config(experiment_config, args.port) + config_result, err_msg = set_remote_config(experiment_config, args.port, config_file_name) if config_result: print_normal('Successfully set remote config!') else: @@ -251,7 +246,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None): # set local config if experiment_config['trainingServicePlatform'] == 'local': print_normal('Setting local config...') - if set_local_config(experiment_config, args.port): + if set_local_config(experiment_config, args.port, config_file_name): print_normal('Successfully set local config!') else: print_error('Failed!') @@ -265,7 +260,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None): #set pai config if experiment_config['trainingServicePlatform'] == 'pai': print_normal('Setting pai config...') - config_result, err_msg = set_pai_config(experiment_config, args.port) + config_result, err_msg = set_pai_config(experiment_config, args.port, config_file_name) if config_result: print_normal('Successfully set pai config!') else: @@ -280,7 +275,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None): # start a new experiment print_normal('Starting experiment...') - response = set_experiment(experiment_config, mode, args.port) + response = set_experiment(experiment_config, mode, args.port, config_file_name) if response: if experiment_id is None: experiment_id = json.loads(response.text).get('experiment_id') @@ -293,24 +288,61 @@ def launch_experiment(args, experiment_config, mode, experiment_id=None): except Exception: raise Exception(ERROR_INFO % 'Restful server stopped!') exit(1) - web_ui_url_list = get_web_ui_urls(args.port) + web_ui_url_list = get_web_ui_urls(args.port, config_file_name) #save experiment information experiment_config = Experiments() - experiment_config.add_experiment(experiment_id, args.port, start_time) + experiment_config.add_experiment(experiment_id, args.port, start_time, config_file_name) print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list))) +def cmp_time(time1, time2): + '''compare the time''' + try: + time1 = time.strptime(time1,'%Y-%m-%d %H:%M:%S') + time2 = time.strptime(time2,'%Y-%m-%d %H:%M:%S') + return int(time1) - int(time2) + except: + return 0 + def resume_experiment(args): '''resume an experiment''' - nni_config = Config(args.port) + experiment_config = Experiments() + experiment_dict = experiment_config.get_all_experiments() + experiment_id = None + experiment_endTime = None + #find the latest stopped experiment + if not args.id: + for key in experiment_dict.keys(): + if experiment_dict[key]['status'] == 'stopped': + if experiment_id is None: + experiment_id = key + experiment_endTime = experiment_dict[key]['endTime'] + else: + if cmp_time(experiment_dict[key]['endTime'], experiment_endTime) > 0: + experiment_id = key + experiment_endTime = experiment_dict[key]['endTime'] + if experiment_id is None: + print_error('There is no experiment stopped!') + exit(1) + else: + if experiment_dict.get(args.id) is None: + print_error('Id %s not exist!' % args.id) + exit(1) + if experiment_dict[args.id]['status'] == 'running': + print_error('Experiment %s is running!' % args.id) + exit(1) + experiment_id = args.id + print_normal('Resuming experiment %s...' % experiment_id) + nni_config = Config(experiment_dict[experiment_id]['fileName']) experiment_config = nni_config.get_config('experimentConfig') experiment_id = nni_config.get_config('experimentId') - launch_experiment(args, experiment_config, 'resume', experiment_id) + launch_experiment(args, experiment_config, 'resume', experiment_dict[experiment_id]['fileName'], experiment_id) def create_experiment(args): '''start a new experiment''' - nni_config = Config(args.port) + config_file_name = ''.join(random.sample(string.ascii_letters + string.digits, 8)) + nni_config = Config(config_file_name) config_path = os.path.abspath(args.config) if not os.path.exists(config_path): print_error('Please set correct config path!') @@ -319,5 +351,5 @@ def create_experiment(args): validate_all_content(experiment_config, config_path) nni_config.set_config('experimentConfig', experiment_config) - launch_experiment(args, experiment_config, 'new') + launch_experiment(args, experiment_config, 'new', config_file_name) nni_config.set_config('restServerPort', args.port) diff --git a/tools/nnicmd/nnictl.py b/tools/nnicmd/nnictl.py index 958c6bd734..d7fd49a046 100644 --- a/tools/nnicmd/nnictl.py +++ b/tools/nnicmd/nnictl.py @@ -45,8 +45,7 @@ def parse_args(): # parse resume command parser_resume = subparsers.add_parser('resume', help='resume a new experiment') - parser_resume.add_argument('--experiment', '-e', dest='id', help='ID of the experiment you want to resume') - parser_resume.add_argument('--manager', '-m', default='nnimanager', dest='manager') + parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume') parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server') parser_resume.set_defaults(func=resume_experiment) @@ -55,15 +54,15 @@ def parse_args(): #add subparsers for parser_updater parser_updater_subparsers = parser_updater.add_subparsers() parser_updater_searchspace = parser_updater_subparsers.add_parser('searchspace', help='update searchspace') - parser_updater_searchspace.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_updater_searchspace.add_argument('id', nargs='?', help='the id of experiment') parser_updater_searchspace.add_argument('--filename', '-f', required=True) parser_updater_searchspace.set_defaults(func=update_searchspace) parser_updater_concurrency = parser_updater_subparsers.add_parser('concurrency', help='update concurrency') - parser_updater_concurrency.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_updater_concurrency.add_argument('id', nargs='?', help='the id of experiment') parser_updater_concurrency.add_argument('--value', '-v', required=True) parser_updater_concurrency.set_defaults(func=update_concurrency) parser_updater_duration = parser_updater_subparsers.add_parser('duration', help='update duration') - parser_updater_duration.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_updater_duration.add_argument('id', nargs='?', help='the id of experiment') parser_updater_duration.add_argument('--value', '-v', required=True) parser_updater_duration.set_defaults(func=update_duration) parser_updater_trialnum = parser_updater_subparsers.add_parser('trialnum', help='update maxtrialnum') @@ -81,10 +80,10 @@ def parse_args(): #add subparsers for parser_trial parser_trial_subparsers = parser_trial.add_subparsers() parser_trial_ls = parser_trial_subparsers.add_parser('ls', help='list trial jobs') - parser_trial_ls.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_trial_ls.add_argument('id', nargs='?', help='the id of experiment') parser_trial_ls.set_defaults(func=trial_ls) parser_trial_kill = parser_trial_subparsers.add_parser('kill', help='kill trial jobs') - parser_trial_kill.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_trial_kill.add_argument('id', nargs='?', help='the id of experiment') parser_trial_kill.add_argument('--trialid', '-t', required=True, dest='trialid', help='the id of trial to be killed') parser_trial_kill.set_defaults(func=trial_kill) @@ -93,13 +92,14 @@ def parse_args(): #add subparsers for parser_experiment parser_experiment_subparsers = parser_experiment.add_subparsers() parser_experiment_show = parser_experiment_subparsers.add_parser('show', help='show the information of experiment') - parser_experiment_show.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_experiment_show.add_argument('id', nargs='?', help='the id of experiment') parser_experiment_show.set_defaults(func=list_experiment) parser_experiment_status = parser_experiment_subparsers.add_parser('status', help='show the status of experiment') - parser_experiment_status.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_experiment_status.add_argument('id', nargs='?', help='the id of experiment') parser_experiment_status.set_defaults(func=experiment_status) parser_experiment_list = parser_experiment_subparsers.add_parser('list', help='list all of running experiment ids') - parser_experiment_list.set_defaults(func=experiment_id) + parser_experiment_list.add_argument('all', nargs='?', help='list all of experiments') + parser_experiment_list.set_defaults(func=experiment_list) #TODO:finish webui function #parse board command @@ -107,14 +107,14 @@ def parse_args(): #add subparsers for parser_board parser_webui_subparsers = parser_webui.add_subparsers() parser_webui_url = parser_webui_subparsers.add_parser('url', help='show the url of web ui') - parser_webui_url.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_webui_url.add_argument('id', nargs='?', help='the id of experiment') parser_webui_url.set_defaults(func=webui_url) #parse config command parser_config = subparsers.add_parser('config', help='get config information') parser_config_subparsers = parser_config.add_subparsers() parser_config_show = parser_config_subparsers.add_parser('show', help='show the information of config') - parser_config_show.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_config_show.add_argument('id', nargs='?', help='the id of experiment') parser_config_show.set_defaults(func=get_config) #parse log command @@ -122,19 +122,19 @@ def parse_args(): # add subparsers for parser_log parser_log_subparsers = parser_log.add_subparsers() parser_log_stdout = parser_log_subparsers.add_parser('stdout', help='get stdout information') - parser_log_stdout.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_log_stdout.add_argument('id', nargs='?', help='the id of experiment') parser_log_stdout.add_argument('--tail', '-T', dest='tail', type=int, help='get tail -100 content of stdout') parser_log_stdout.add_argument('--head', '-H', dest='head', type=int, help='get head -100 content of stdout') parser_log_stdout.add_argument('--path', action='store_true', default=False, help='get the path of stdout file') parser_log_stdout.set_defaults(func=log_stdout) parser_log_stderr = parser_log_subparsers.add_parser('stderr', help='get stderr information') - parser_log_stderr.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_log_stderr.add_argument('id', nargs='?', help='the id of experiment') parser_log_stderr.add_argument('--tail', '-T', dest='tail', type=int, help='get tail -100 content of stderr') parser_log_stderr.add_argument('--head', '-H', dest='head', type=int, help='get head -100 content of stderr') parser_log_stderr.add_argument('--path', action='store_true', default=False, help='get the path of stderr file') parser_log_stderr.set_defaults(func=log_stderr) parser_log_trial = parser_log_subparsers.add_parser('trial', help='get trial log path') - parser_log_trial.add_argument('--id', '-i', dest='id', help='the id of experiment') + parser_log_trial.add_argument('id', nargs='?', help='the id of experiment') parser_log_trial.add_argument('--trialid', '-T', dest='trialid', help='find trial log path by id') parser_log_trial.set_defaults(func=log_trial) @@ -144,7 +144,7 @@ def parse_args(): parser_package_subparsers = parser_package.add_subparsers() parser_package_install = parser_package_subparsers.add_parser('install', help='install packages') parser_package_install.add_argument('--name', '-n', dest='name', help='package name to be installed') - parser_package_install.set_defaults(func=package_install) + parser_package_install.set_defaults(func=package_install) parser_package_show = parser_package_subparsers.add_parser('show', help='show the information of packages') parser_package_show.set_defaults(func=package_show) diff --git a/tools/nnicmd/nnictl_utils.py b/tools/nnicmd/nnictl_utils.py index 0aa31cf635..d4d99309fd 100644 --- a/tools/nnicmd/nnictl_utils.py +++ b/tools/nnicmd/nnictl_utils.py @@ -22,96 +22,87 @@ import psutil import json import datetime +import time from subprocess import call, check_output from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response from .config_utils import Config, Experiments from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url -from .constants import NNICTL_HOME_DIR, EXPERIMENT_ID_INFO +from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT import time -from .common_utils import print_normal, print_error, detect_process +from .common_utils import print_normal, print_error, print_warning, detect_process -def get_experiment_port(args): - '''get the port of an experiment''' +def check_experiment_id(args): + '''check if the id is valid + ''' experiment_config = Experiments() experiment_dict = experiment_config.get_all_experiments() - #1.If there is an id specified, return the corresponding port - #2.If there is no id specified, and there is an experiment running, return it as default port, or return Error - #3.If the id matches an experiment, nnictl will return the id. - #4.If the id ends with *, nnictl will match all ids matchs the regular - #5.If the id does not exist but match the prefix of an experiment id, nnictl will return the matched id - #6.If the id does not exist but match multiple prefix of the experiment ids, nnictl will give id information - #7.Users could use 'nnictl stop all' to stop all experiments if not experiment_dict: - print_normal('Experiment is not running...') - return None - if not args.id and len(experiment_dict.keys()) > 1: - print_error('There are multiple experiments running, please set the experiment id...') - experiment_information = "" - for key in experiment_dict.keys(): - experiment_information += ('Id: ' + key + ' StartTime: ' + experiment_dict[key][1] + '\n') - print(EXPERIMENT_ID_INFO % experiment_information) - return None + print_normal('There is no experiment running...') + exit(1) if not args.id: - return list(experiment_dict.values())[0][0] + running_experiment_list = [] + for key in experiment_dict.keys(): + if experiment_dict[key]['status'] == 'running': + running_experiment_list.append(key) + if len(running_experiment_list) > 1: + print_error('There are multiple experiments running, please set the experiment id...') + experiment_information = "" + for key in running_experiment_list: + experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \ + experiment_dict[key]['startTime'], experiment_dict[key]['endTime'])) + print(EXPERIMENT_INFORMATION_FORMAT % experiment_information) + exit(1) + elif not running_experiment_list: + print_error('There is no experiment running!') + exit(1) + else: + return running_experiment_list[0] if experiment_dict.get(args.id): - return experiment_dict[args.id][0] + return args.id else: - print_error('Id not correct!') - return None - -def convert_time_stamp_to_date(content): - '''Convert time stamp to date time format''' - start_time_stamp = content.get('startTime') - end_time_stamp = content.get('endTime') - if start_time_stamp: - start_time = datetime.datetime.utcfromtimestamp(start_time_stamp // 1000).strftime("%Y/%m/%d %H:%M:%S") - content['startTime'] = str(start_time) - if end_time_stamp: - end_time = datetime.datetime.utcfromtimestamp(end_time_stamp // 1000).strftime("%Y/%m/%d %H:%M:%S") - content['endTime'] = str(end_time) - return content - -def check_rest(args): - '''check if restful server is running''' - port = get_experiment_port(args) - if port is None: - return None - nni_config = Config(port) - rest_port = nni_config.get_config('restServerPort') - running, _ = check_rest_server_quick(rest_port) - if not running: - print_normal('Restful server is running...') - else: - print_normal('Restful server is not running...') + print_error('Id not correct!') + exit(1) def parse_ids(args): - '''Parse the arguments for nnictl stop''' + '''Parse the arguments for nnictl stop + 1.If there is an id specified, return the corresponding id + 2.If there is no id specified, and there is an experiment running, return the id, or return Error + 3.If the id matches an experiment, nnictl will return the id. + 4.If the id ends with *, nnictl will match all ids matchs the regular + 5.If the id does not exist but match the prefix of an experiment id, nnictl will return the matched id + 6.If the id does not exist but match multiple prefix of the experiment ids, nnictl will give id information + ''' experiment_config = Experiments() experiment_dict = experiment_config.get_all_experiments() if not experiment_dict: print_normal('Experiment is not running...') return None - experiment_id_list = list(experiment_dict.keys()) result_list = [] + running_experiment_list = [] + for key in experiment_dict.keys(): + if experiment_dict[key]['status'] == 'running': + running_experiment_list.append(key) if not args.id: - if len(experiment_id_list) > 1: + if len(running_experiment_list) > 1: print_error('There are multiple experiments running, please set the experiment id...') experiment_information = "" - for key in experiment_dict.keys(): - experiment_information += ('Id: ' + key + ' StartTime: ' + experiment_dict[key][1] + '\n') - print(EXPERIMENT_ID_INFO % experiment_information) - return None - result_list = experiment_id_list + for key in running_experiment_list: + experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \ + experiment_dict[key]['startTime'], experiment_dict[key]['endTime'])) + print(EXPERIMENT_INFORMATION_FORMAT % experiment_information) + exit(1) + else: + result_list = running_experiment_list elif args.id == 'all': - result_list = experiment_id_list + result_list = running_experiment_list elif args.id.endswith('*'): - for id in experiment_id_list: + for id in running_experiment_list: if id.startswith(args.id[:-1]): result_list.append(id) - elif args.id in experiment_id_list: + elif args.id in running_experiment_list: result_list.append(args.id) else: - for id in experiment_id_list: + for id in running_experiment_list: if id.startswith(args.id): result_list.append(id) if len(result_list) > 1: @@ -121,6 +112,42 @@ def parse_ids(args): print_error('There are no experiments matched, please check experiment id...') return result_list +def get_config_filename(args): + '''get the file name of config file''' + experiment_id = check_experiment_id(args) + experiment_config = Experiments() + experiment_dict = experiment_config.get_all_experiments() + return experiment_dict[experiment_id]['fileName'] + +def get_experiment_port(args): + '''get the port of experiment''' + experiment_id = check_experiment_id(args) + experiment_config = Experiments() + experiment_dict = experiment_config.get_all_experiments() + return experiment_dict[experiment_id]['port'] + +def convert_time_stamp_to_date(content): + '''Convert time stamp to date time format''' + start_time_stamp = content.get('startTime') + end_time_stamp = content.get('endTime') + if start_time_stamp: + start_time = datetime.datetime.utcfromtimestamp(start_time_stamp // 1000).strftime("%Y/%m/%d %H:%M:%S") + content['startTime'] = str(start_time) + if end_time_stamp: + end_time = datetime.datetime.utcfromtimestamp(end_time_stamp // 1000).strftime("%Y/%m/%d %H:%M:%S") + content['endTime'] = str(end_time) + return content + +def check_rest(args): + '''check if restful server is running''' + nni_config = Config(get_config_filename(args)) + rest_port = nni_config.get_config('restServerPort') + running, _ = check_rest_server_quick(rest_port) + if not running: + print_normal('Restful server is running...') + else: + print_normal('Restful server is not running...') + def stop_experiment(args): '''Stop the experiment which is running''' experiment_id_list = parse_ids(args) @@ -128,15 +155,13 @@ def stop_experiment(args): experiment_config = Experiments() experiment_dict = experiment_config.get_all_experiments() for experiment_id in experiment_id_list: - port = experiment_dict.get(experiment_id)[0] - if port is None: - return None print_normal('Stoping experiment %s' % experiment_id) - nni_config = Config(port) + nni_config = Config(experiment_dict[experiment_id]['fileName']) rest_port = nni_config.get_config('restServerPort') rest_pid = nni_config.get_config('restServerPid') if not detect_process(rest_pid): print_normal('Experiment is not running...') + experiment_config.update_experiment(experiment_id, 'status', 'stopped') return running, _ = check_rest_server_quick(rest_port) stop_rest_result = True @@ -153,15 +178,13 @@ def stop_experiment(args): call(cmds) if stop_rest_result: print_normal('Stop experiment success!') - experiment_config = Experiments() - experiment_config.remove_experiment(experiment_id) + experiment_config.update_experiment(experiment_id, 'status', 'stopped') + time_now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) + experiment_config.update_experiment(experiment_id, 'endTime', str(time_now)) def trial_ls(args): '''List trial''' - port = get_experiment_port(args) - if port is None: - return None - nni_config = Config(port) + nni_config = Config(get_config_filename(args)) rest_port = nni_config.get_config('restServerPort') rest_pid = nni_config.get_config('restServerPid') if not detect_process(rest_pid): @@ -182,10 +205,7 @@ def trial_ls(args): def trial_kill(args): '''List trial''' - port = get_experiment_port(args) - if port is None: - return None - nni_config = Config(port) + nni_config = Config(get_config_filename(args)) rest_port = nni_config.get_config('restServerPort') rest_pid = nni_config.get_config('restServerPid') if not detect_process(rest_pid): @@ -203,10 +223,7 @@ def trial_kill(args): def list_experiment(args): '''Get experiment information''' - port = get_experiment_port(args) - if port is None: - return None - nni_config = Config(port) + nni_config = Config(get_config_filename(args)) rest_port = nni_config.get_config('restServerPort') rest_pid = nni_config.get_config('restServerPid') if not detect_process(rest_pid): @@ -225,10 +242,7 @@ def list_experiment(args): def experiment_status(args): '''Show the status of experiment''' - port = get_experiment_port(args) - if port is None: - return None - nni_config = Config(port) + nni_config = Config(get_config_filename(args)) rest_port = nni_config.get_config('restServerPort') result, response = check_rest_server_quick(rest_port) if not result: @@ -246,13 +260,11 @@ def get_log_content(file_name, cmds): def log_internal(args, filetype): '''internal function to call get_log_content''' - port = get_experiment_port(args) - if port is None: - return None + file_name = get_config_filename(args) if filetype == 'stdout': - file_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stdout') + file_full_path = os.path.join(NNICTL_HOME_DIR, file_name, 'stdout') else: - file_full_path = os.path.join(NNICTL_HOME_DIR, str(port), 'stderr') + file_full_path = os.path.join(NNICTL_HOME_DIR, file_name, 'stderr') if args.head: get_log_content(file_full_path, ['head', '-' + str(args.head), file_full_path]) elif args.tail: @@ -273,10 +285,7 @@ def log_stderr(args): def log_trial(args): ''''get trial log path''' trial_id_path_dict = {} - port = get_experiment_port(args) - if port is None: - return None - nni_config = Config(port) + nni_config = Config(get_config_filename(args)) rest_port = nni_config.get_config('restServerPort') rest_pid = nni_config.get_config('restServerPid') if not detect_process(rest_pid): @@ -304,28 +313,33 @@ def log_trial(args): def get_config(args): '''get config info''' - port = get_experiment_port(args) - if port is None: - return None - nni_config = Config(port) + nni_config = Config(get_config_filename(args)) print(nni_config.get_all_config()) def webui_url(args): '''show the url of web ui''' - port = get_experiment_port(args) - if port is None: - return None - nni_config = Config(port) + nni_config = Config(get_config_filename(args)) print_normal('{0} {1}'.format('Web UI url:', ' '.join(nni_config.get_config('webuiUrl')))) -def experiment_id(args): - '''get the id of all experiments''' +def experiment_list(args): + '''get the information of all experiments''' experiment_config = Experiments() experiment_dict = experiment_config.get_all_experiments() if not experiment_dict: print('There is no experiment running...') + exit(1) + experiment_id_list = [] + if args.all and args.all == 'all': + for key in experiment_dict.keys(): + experiment_id_list.append(key) else: - experiment_information = "" for key in experiment_dict.keys(): - experiment_information += ('Id: ' + key + ' StartTime: ' + experiment_dict[key][1] + '\n') - print(EXPERIMENT_ID_INFO % experiment_information) \ No newline at end of file + if experiment_dict[key]['status'] == 'running': + experiment_id_list.append(key) + if not experiment_id_list: + print_warning('There is no experiment running...\nYou can use \'nnictl experiment list all\' to list all stopped experiments!') + experiment_information = "" + for key in experiment_id_list: + experiment_information += (EXPERIMENT_DETAIL_FORMAT % (key, experiment_dict[key]['status'], \ + experiment_dict[key]['startTime'], experiment_dict[key]['endTime'])) + print(EXPERIMENT_INFORMATION_FORMAT % experiment_information) diff --git a/tools/nnicmd/updater.py b/tools/nnicmd/updater.py index 751f81cf1a..798e7632d6 100644 --- a/tools/nnicmd/updater.py +++ b/tools/nnicmd/updater.py @@ -25,7 +25,7 @@ from .url_utils import experiment_url from .config_utils import Config from .common_utils import get_json_content -from .nnictl_utils import get_experiment_port +from .nnictl_utils import check_experiment_id, get_experiment_port, get_config_filename def validate_digit(value, start, end): '''validate if a digit is valid''' @@ -57,7 +57,7 @@ def get_query_type(key): def update_experiment_profile(args, key, value): '''call restful server to update experiment profile''' - nni_config = Config(args.port) + nni_config = Config(get_config_filename(args)) rest_port = nni_config.get_config('restServerPort') running, _ = check_rest_server_quick(rest_port) if running: @@ -102,9 +102,7 @@ def update_duration(args): def update_trialnum(args): validate_digit(args.value, 1, 999999999) - args.port = get_experiment_port(args) - if args.port is not None: - if update_experiment_profile(args, 'maxTrialNum', int(args.value)): - print('INFO: update %s success!' % 'trialnum') - else: - print('ERROR: update %s failed!' % 'trialnum') \ No newline at end of file + if update_experiment_profile(args, 'maxTrialNum', int(args.value)): + print('INFO: update %s success!' % 'trialnum') + else: + print('ERROR: update %s failed!' % 'trialnum') \ No newline at end of file diff --git a/tools/nnicmd/webui_utils.py b/tools/nnicmd/webui_utils.py index 89a5c2cf9d..69c374aebd 100644 --- a/tools/nnicmd/webui_utils.py +++ b/tools/nnicmd/webui_utils.py @@ -22,12 +22,12 @@ from socket import AddressFamily from .config_utils import Config -def get_web_ui_urls(port): +def get_web_ui_urls(port, CONFIG_FILE_NAME): webui_url_list = [] for name, info in psutil.net_if_addrs().items(): for addr in info: if AddressFamily.AF_INET == addr.family: webui_url_list.append('http://{}:{}'.format(addr.address, port)) - nni_config = Config(port) + nni_config = Config(CONFIG_FILE_NAME) nni_config.set_config('webuiUrl', webui_url_list) return webui_url_list