Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Refactor NNI manager globals (step 1) - argparse #4510

Merged
merged 5 commits into from
Mar 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions nni/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def __init__(self, config=None, training_service=None):
self.id: str = management.generate_experiment_id()
self.port: Optional[int] = None
self._proc: Optional[Popen] = None
self.mode = 'new'
self.action = 'create'
self.url_prefix: Optional[str] = None

args = [config, training_service] # deal with overloading
Expand Down Expand Up @@ -127,7 +127,7 @@ def start(self, port: int = 8080, debug: bool = False, run_mode: RunMode = RunMo
log_dir = Path.home() / f'nni-experiments/{self.id}/log'
nni.runtime.log.start_experiment_log(self.id, log_dir, debug)

self._proc = launcher.start_experiment(self.mode, self.id, config, port, debug, run_mode, self.url_prefix)
self._proc = launcher.start_experiment(self.action, self.id, config, port, debug, run_mode, self.url_prefix)
assert self._proc is not None

self.port = port # port will be None if start up failed
Expand Down Expand Up @@ -261,15 +261,15 @@ def view(experiment_id: str, port: int = 8080, non_blocking: bool = False):
def _resume(exp_id, exp_dir=None):
exp = Experiment()
exp.id = exp_id
exp.mode = 'resume'
exp.action = 'resume'
exp.config = launcher.get_stopped_experiment_config(exp_id, exp_dir)
return exp

@staticmethod
def _view(exp_id, exp_dir=None):
exp = Experiment()
exp.id = exp_id
exp.mode = 'view'
exp.action = 'view'
exp.config = launcher.get_stopped_experiment_config(exp_id, exp_dir)
return exp

Expand Down
44 changes: 20 additions & 24 deletions nni/experiment/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,27 @@

@dataclass(init=False)
class NniManagerArgs:
# argv sent to "ts/nni_manager/main.js"

port: int
experiment_id: int
start_mode: str # new or resume
mode: str # training service platform
log_dir: str
action: str # 'new', 'resume', 'view'
mode: str # training service platform, to be removed
experiments_directory: str # renamed "config.nni_experiments_directory", must be absolute
log_level: str
readonly: bool = False
foreground: bool = False
url_prefix: Optional[str] = None
url_prefix: Optional[str] = None # leading and trailing "/" must be stripped
dispatcher_pipe: Optional[str] = None

def __init__(self, action, exp_id, config, port, debug, foreground, url_prefix):
self.port = port
self.experiment_id = exp_id
self.action = action
self.foreground = foreground
self.url_prefix = url_prefix
self.log_dir = config.experiment_working_directory
# config field name "experiment_working_directory" is a mistake
# see "ts/nni_manager/common/globals/arguments.ts" for details
self.experiments_directory = config.experiment_working_directory

if isinstance(config.training_service, list):
self.mode = 'hybrid'
Expand All @@ -54,20 +58,14 @@ def __init__(self, action, exp_id, config, port, debug, foreground, url_prefix):
if debug and self.log_level not in ['debug', 'trace']:
self.log_level = 'debug'

if action == 'resume':
self.start_mode = 'resume'
elif action == 'view':
self.start_mode = 'resume'
self.readonly = True
else:
self.start_mode = 'new'

def to_command_line_args(self):
# reformat fields to meet yargs library's format
# see "ts/nni_manager/common/globals/arguments.ts" for details
ret = []
for field in fields(self):
value = getattr(self, field.name)
if value is not None:
ret.append('--' + field.name)
ret.append('--' + field.name.replace('_', '-'))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this update?

if isinstance(value, bool):
ret.append(str(value).lower())
else:
Expand All @@ -76,6 +74,8 @@ def to_command_line_args(self):

def start_experiment(action, exp_id, config, port, debug, run_mode, url_prefix):
foreground = run_mode.value == 'foreground'
if url_prefix is not None:
url_prefix = url_prefix.strip('/')
nni_manager_args = NniManagerArgs(action, exp_id, config, port, debug, foreground, url_prefix)

_ensure_port_idle(port)
Expand Down Expand Up @@ -135,7 +135,7 @@ def _start_rest_server(nni_manager_args, run_mode) -> Tuple[int, Popen]:
cmd += nni_manager_args.to_command_line_args()

if run_mode.value == 'detach':
log = Path(nni_manager_args.log_dir, nni_manager_args.experiment_id, 'log')
log = Path(nni_manager_args.experiments_directory, nni_manager_args.experiment_id, 'log')
out = (log / 'nnictl_stdout.log').open('a')
err = (log / 'nnictl_stderr.log').open('a')
header = f'Experiment {nni_manager_args.experiment_id} start: {datetime.now()}'
Expand Down Expand Up @@ -201,7 +201,7 @@ def _ensure_port_idle(port: int, message: Optional[str] = None) -> None:


def _start_rest_server_retiarii(config: ExperimentConfig, port: int, debug: bool, experiment_id: str,
pipe_path: str = None, mode: str = 'new') -> Tuple[int, Popen]:
pipe_path: str, mode: str = 'create') -> Tuple[int, Popen]:
if isinstance(config.training_service, list):
ts = 'hybrid'
else:
Expand All @@ -213,24 +213,20 @@ def _start_rest_server_retiarii(config: ExperimentConfig, port: int, debug: bool
'port': port,
'mode': ts,
'experiment_id': experiment_id,
'start_mode': mode,
'log_dir': config.experiment_working_directory,
'action': mode,
'experiments_directory': config.experiment_working_directory,
'log_level': 'debug' if debug else 'info'
}
if pipe_path is not None:
args['dispatcher_pipe'] = pipe_path

if mode == 'view':
args['start_mode'] = 'resume'
args['readonly'] = 'true'

import nni_node
node_dir = Path(nni_node.__path__[0])
node = str(node_dir / ('node.exe' if sys.platform == 'win32' else 'node'))
main_js = str(node_dir / 'main.js')
cmd = [node, '--max-old-space-size=4096', main_js]
for arg_key, arg_value in args.items():
cmd.append('--' + arg_key)
cmd.append('--' + arg_key.replace('_', '-'))
cmd.append(str(arg_value))

if sys.platform == 'win32':
Expand Down
16 changes: 6 additions & 10 deletions nni/tools/nnictl/legacy_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,17 @@ def start_rest_server(port, platform, mode, experiment_id, foreground=False, log
node_command = os.path.join(entry_dir, 'node')
cmds = [node_command, '--max-old-space-size=4096', entry_file, '--port', str(port), '--mode', platform, \
'--experiment_id', experiment_id]
if mode == 'view':
cmds += ['--start_mode', 'resume']
cmds += ['--readonly', 'true']
else:
cmds += ['--start_mode', mode]
cmds += ['--action', mode]
if log_dir is not None:
cmds += ['--log_dir', log_dir]
cmds += ['--experiments-directory', log_dir]
if log_level is not None:
cmds += ['--log_level', log_level]
cmds += ['--log-level', log_level]
if foreground:
cmds += ['--foreground', 'true']
if url_prefix:
_validate_prefix_path(url_prefix)
set_prefix_url(url_prefix)
cmds += ['--url_prefix', url_prefix]
cmds += ['--url-prefix', url_prefix.strip('/')]

stdout_full_path, stderr_full_path = get_log_path(experiment_id)
with open(stdout_full_path, 'a+') as stdout_file, open(stderr_full_path, 'a+') as stderr_file:
Expand Down Expand Up @@ -520,9 +516,9 @@ def create_experiment(args):

try:
if schema == 1:
launch_experiment(args, config_v1, 'new', experiment_id, 1)
launch_experiment(args, config_v1, 'create', experiment_id, 1)
else:
launch_experiment(args, config_v2, 'new', experiment_id, 2)
launch_experiment(args, config_v2, 'create', experiment_id, 2)
except Exception as exception:
restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid')
if restServerPid:
Expand Down
96 changes: 96 additions & 0 deletions ts/nni_manager/common/globals/arguments.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

/**
* Parse NNI manager's command line arguments.
**/

import assert from 'assert/strict';

import yargs from 'yargs/yargs';

/**
* Command line arguments provided by "nni/experiment/launcher.py".
*
* Hyphen-separated words are automatically converted to camelCases by yargs lib, but snake_cases are not.
* So it supports "--log-level" but does not support "--log_level".
*
* Unfortunately I misunderstood "experiment_working_directory" config field when deciding the name.
* It defaults to "~/nni-experiments" rather than "~/nni-experiments/<experiment-id>",
* and further more the working directory is "site-packages/nni_node", not either.
* For compatibility concern we cannot change the public API, so there is an inconsistency here.
**/
export interface NniManagerArgs {
readonly port: number;
readonly experimentId: string;
readonly action: 'create' | 'resume' | 'view';
readonly experimentsDirectory: string; // renamed "config.experiment_working_directory", must be absolute
readonly logLevel: 'critical' | 'error' | 'warning' | 'info' | 'debug';
readonly foreground: boolean;
readonly urlPrefix: string; // leading and trailing "/" must be stripped

// these are planned to be removed
readonly mode: string;
readonly dispatcherPipe: string | undefined;
}

export function parseArgs(rawArgs: string[]): NniManagerArgs {
const parser = yargs(rawArgs).options(yargsOptions).strict().fail((_msg, err, _yargs) => { throw err; });
const parsedArgs: NniManagerArgs = parser.parseSync();

// strip yargs leftovers
const argsAsAny: any = {};
for (const key in yargsOptions) {
argsAsAny[key] = (parsedArgs as any)[key];
assert(!Number.isNaN(argsAsAny[key]), `Command line arg --${key} is not a number`);
}
if (argsAsAny.dispatcherPipe === '') {
argsAsAny.dispatcherPipe = undefined;
}
const args: NniManagerArgs = argsAsAny;

const prefixErrMsg = `Command line arg --url-prefix "${args.urlPrefix}" is not stripped`;
assert(!args.urlPrefix.startsWith('/') && !args.urlPrefix.endsWith('/'), prefixErrMsg);

return args;
}

const yargsOptions = {
port: {
demandOption: true,
type: 'number'
},
experimentId: {
demandOption: true,
type: 'string'
},
action: {
choices: [ 'create', 'resume', 'view' ] as const,
demandOption: true
},
experimentsDirectory: {
demandOption: true,
type: 'string'
},
logLevel: {
choices: [ 'critical', 'error', 'warning', 'info', 'debug' ] as const,
demandOption: true
},
foreground: {
default: false,
type: 'boolean'
},
urlPrefix: {
default: '',
type: 'string'
},

mode: {
default: '',
type: 'string'
},
dispatcherPipe: {
default: '',
type: 'string'
}
} as const;
Loading