Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Refactor NNI manager globals (step 1) - argparse #4510

Merged
merged 5 commits into from
Mar 20, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions nni/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def __init__(self, config=None, training_service=None):
self.id: str = management.generate_experiment_id()
self.port: Optional[int] = None
self._proc: Optional[Popen] = None
self.mode = 'new'
self.action = 'create'
self.url_prefix: Optional[str] = None

args = [config, training_service] # deal with overloading
Expand Down Expand Up @@ -127,7 +127,7 @@ def start(self, port: int = 8080, debug: bool = False, run_mode: RunMode = RunMo
log_dir = Path.home() / f'nni-experiments/{self.id}/log'
nni.runtime.log.start_experiment_log(self.id, log_dir, debug)

self._proc = launcher.start_experiment(self.mode, self.id, config, port, debug, run_mode, self.url_prefix)
self._proc = launcher.start_experiment(self.action, self.id, config, port, debug, run_mode, self.url_prefix)
assert self._proc is not None

self.port = port # port will be None if start up failed
Expand Down Expand Up @@ -261,15 +261,15 @@ def view(experiment_id: str, port: int = 8080, non_blocking: bool = False):
def _resume(exp_id, exp_dir=None):
exp = Experiment()
exp.id = exp_id
exp.mode = 'resume'
exp.action = 'resume'
exp.config = launcher.get_stopped_experiment_config(exp_id, exp_dir)
return exp

@staticmethod
def _view(exp_id, exp_dir=None):
exp = Experiment()
exp.id = exp_id
exp.mode = 'view'
exp.action = 'view'
exp.config = launcher.get_stopped_experiment_config(exp_id, exp_dir)
return exp

Expand Down
34 changes: 12 additions & 22 deletions nni/experiment/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,21 @@
class NniManagerArgs:
port: int
experiment_id: int
start_mode: str # new or resume
action: str # new or resume
mode: str # training service platform
log_dir: str
experiments_directory: str
log_level: str
readonly: bool = False
foreground: bool = False
url_prefix: Optional[str] = None
dispatcher_pipe: Optional[str] = None

def __init__(self, action, exp_id, config, port, debug, foreground, url_prefix):
self.port = port
self.experiment_id = exp_id
self.action = action
self.foreground = foreground
self.url_prefix = url_prefix
self.log_dir = config.experiment_working_directory
self.experiments_directory = config.experiment_working_directory

if isinstance(config.training_service, list):
self.mode = 'hybrid'
Expand All @@ -55,20 +55,12 @@ def __init__(self, action, exp_id, config, port, debug, foreground, url_prefix):
if debug and self.log_level not in ['debug', 'trace']:
self.log_level = 'debug'

if action == 'resume':
self.start_mode = 'resume'
elif action == 'view':
self.start_mode = 'resume'
self.readonly = True
else:
self.start_mode = 'new'

def to_command_line_args(self):
ret = []
for field in fields(self):
value = getattr(self, field.name)
if value is not None:
ret.append('--' + field.name)
ret.append('--' + field.name.replace('_', '-'))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this update?

if isinstance(value, bool):
ret.append(str(value).lower())
else:
Expand All @@ -77,6 +69,8 @@ def to_command_line_args(self):

def start_experiment(action, exp_id, config, port, debug, run_mode, url_prefix):
foreground = run_mode.value == 'foreground'
if url_prefix is not None:
url_prefix = url_prefix.strip('/')
nni_manager_args = NniManagerArgs(action, exp_id, config, port, debug, foreground, url_prefix)

_ensure_port_idle(port)
Expand Down Expand Up @@ -126,7 +120,7 @@ def _start_rest_server(nni_manager_args, run_mode) -> Tuple[int, Popen]:
cmd += nni_manager_args.to_command_line_args()

if run_mode.value == 'detach':
log = Path(nni_manager_args.log_dir, nni_manager_args.experiment_id, 'log')
log = Path(nni_manager_args.experiments_directory, nni_manager_args.experiment_id, 'log')
out = (log / 'nnictl_stdout.log').open('a')
err = (log / 'nnictl_stderr.log').open('a')
header = f'Experiment {nni_manager_args.experiment_id} start: {datetime.now()}'
Expand Down Expand Up @@ -192,7 +186,7 @@ def _ensure_port_idle(port: int, message: Optional[str] = None) -> None:


def _start_rest_server_retiarii(config: ExperimentConfig, port: int, debug: bool, experiment_id: str,
pipe_path: str = None, mode: str = 'new') -> Tuple[int, Popen]:
pipe_path: str) -> Tuple[int, Popen]:
if isinstance(config.training_service, list):
ts = 'hybrid'
else:
Expand All @@ -204,23 +198,19 @@ def _start_rest_server_retiarii(config: ExperimentConfig, port: int, debug: bool
'port': port,
'mode': ts,
'experiment_id': experiment_id,
'start_mode': mode,
'log_dir': config.experiment_working_directory,
'action': 'create',
'experiments_directory': config.experiment_working_directory,
'log_level': 'debug' if debug else 'info'
}
if pipe_path is not None:
args['dispatcher_pipe'] = pipe_path

if mode == 'view':
args['start_mode'] = 'resume'
args['readonly'] = 'true'

node_dir = Path(nni_node.__path__[0])
node = str(node_dir / ('node.exe' if sys.platform == 'win32' else 'node'))
main_js = str(node_dir / 'main.js')
cmd = [node, '--max-old-space-size=4096', main_js]
for arg_key, arg_value in args.items():
cmd.append('--' + arg_key)
cmd.append('--' + arg_key.replace('_', '-'))
cmd.append(str(arg_value))

if sys.platform == 'win32':
Expand Down
16 changes: 6 additions & 10 deletions nni/tools/nnictl/legacy_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,17 @@ def start_rest_server(port, platform, mode, experiment_id, foreground=False, log
node_command = os.path.join(entry_dir, 'node')
cmds = [node_command, '--max-old-space-size=4096', entry_file, '--port', str(port), '--mode', platform, \
'--experiment_id', experiment_id]
if mode == 'view':
cmds += ['--start_mode', 'resume']
cmds += ['--readonly', 'true']
else:
cmds += ['--start_mode', mode]
cmds += ['--action', mode]
if log_dir is not None:
cmds += ['--log_dir', log_dir]
cmds += ['--experiments-directory', log_dir]
if log_level is not None:
cmds += ['--log_level', log_level]
cmds += ['--log-level', log_level]
if foreground:
cmds += ['--foreground', 'true']
if url_prefix:
_validate_prefix_path(url_prefix)
set_prefix_url(url_prefix)
cmds += ['--url_prefix', url_prefix]
cmds += ['--url-prefix', url_prefix.strip('/')]

stdout_full_path, stderr_full_path = get_log_path(experiment_id)
with open(stdout_full_path, 'a+') as stdout_file, open(stderr_full_path, 'a+') as stderr_file:
Expand Down Expand Up @@ -520,9 +516,9 @@ def create_experiment(args):

try:
if schema == 1:
launch_experiment(args, config_v1, 'new', experiment_id, 1)
launch_experiment(args, config_v1, 'create', experiment_id, 1)
else:
launch_experiment(args, config_v2, 'new', experiment_id, 2)
launch_experiment(args, config_v2, 'create', experiment_id, 2)
except Exception as exception:
restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid')
if restServerPid:
Expand Down
91 changes: 91 additions & 0 deletions ts/nni_manager/common/globals/arguments.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

/**
* Parse NNI manager's command line arguments.
**/

import assert from 'assert/strict';

import yargs from 'yargs/yargs';

/**
* Command line arguments provided by "nni/experiment/launcher.py".
*
* Hyphen-separated words are automatically converted to camelCases by yargs lib, but snake_cases are not.
* So it supports "--log-level" but does not support "--log_level".
**/
export interface NniManagerArgs {
readonly port: number;
readonly experimentId: string;
readonly action: 'create' | 'resume' | 'view';
readonly experimentsDirectory: string; // must be absolute
readonly logLevel: 'critical' | 'error' | 'warning' | 'info' | 'debug';
readonly foreground: boolean;
readonly urlPrefix: string; // leading and trailing slashes must be stripped

// these are planned to be removed
readonly mode: string;
readonly dispatcherPipe: string | undefined;
}

export function parseArgs(rawArgs: string[]): NniManagerArgs {
const parser = yargs(rawArgs).options(yargsOptions).strict().fail((_msg, err, _yargs) => { throw err; });
const parsedArgs: NniManagerArgs = parser.parseSync();

// strip yargs leftovers
const argsAsAny: any = {};
for (const key in yargsOptions) {
argsAsAny[key] = (parsedArgs as any)[key];
assert(!Number.isNaN(argsAsAny[key]), `Command line arg --${key} is not a number`);
}
if (argsAsAny.dispatcherPipe === '') {
argsAsAny.dispatcherPipe = undefined;
}
const args: NniManagerArgs = argsAsAny;

const prefixErrMsg = `Command line arg --url-prefix "${args.urlPrefix}" is not stripped`;
assert(!args.urlPrefix.startsWith('/') && !args.urlPrefix.endsWith('/'), prefixErrMsg);

return args;
}

const yargsOptions = {
port: {
demandOption: true,
type: 'number'
},
experimentId: {
demandOption: true,
type: 'string'
},
action: {
choices: [ 'create', 'resume', 'view' ] as const,
demandOption: true
},
experimentsDirectory: {
demandOption: true,
type: 'string'
},
logLevel: {
choices: [ 'critical', 'error', 'warning', 'info', 'debug' ] as const,
demandOption: true
},
foreground: {
default: false,
type: 'boolean'
},
urlPrefix: {
default: '',
type: 'string'
},

mode: {
default: '',
type: 'string'
},
dispatcherPipe: {
default: '',
type: 'string'
}
} as const;
92 changes: 18 additions & 74 deletions ts/nni_manager/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,11 @@ import { SqlDB } from './core/sqlDatabase';
import { NNIExperimentsManager } from './core/nniExperimentsManager';
import { NNITensorboardManager } from './core/nniTensorboardManager';
import { RestServer } from './rest_server';
import { parseArgs } from 'common/globals/arguments';

function initStartupInfo(
startExpMode: string, experimentId: string, basePort: number, platform: string,
logDirectory: string, experimentLogLevel: string, readonly: boolean, dispatcherPipe: string, urlprefix: string): void {
const createNew: boolean = (startExpMode === ExperimentStartUpMode.NEW);
setExperimentStartupInfo(createNew, experimentId, basePort, platform, logDirectory, experimentLogLevel, readonly, dispatcherPipe, urlprefix);
}
const args = parseArgs(process.argv.slice(2));

async function initContainer(foreground: boolean, _platformMode: string, logFileName?: string): Promise<void> {
async function initContainer(): Promise<void> {
Container.bind(Manager)
.to(NNIManager)
.scope(Scope.Singleton);
Expand All @@ -45,84 +41,32 @@ async function initContainer(foreground: boolean, _platformMode: string, logFile
.to(NNITensorboardManager)
.scope(Scope.Singleton);
const DEFAULT_LOGFILE: string = path.join(getLogDir(), 'nnimanager.log');
if (!foreground) {
if (logFileName === undefined) {
startLogging(DEFAULT_LOGFILE);
} else {
startLogging(logFileName);
}
if (!args.foreground) {
startLogging(DEFAULT_LOGFILE);
}
// eslint-disable-next-line @typescript-eslint/no-use-before-define
setLogLevel(logLevel);
setLogLevel(args.logLevel);
const ds: DataStore = component.get(DataStore);

await ds.init();
}

function usage(): void {
console.info('usage: node main.js --port <port> --mode \
<local/remote/pai/kubeflow/frameworkcontroller/aml/adl/hybrid/dlc> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
}

const strPort: string = parseArg(['--port', '-p']);
if (!strPort || strPort.length === 0) {
usage();
process.exit(1);
}

const foregroundArg: string = parseArg(['--foreground', '-f']);
if (foregroundArg && !['true', 'false'].includes(foregroundArg.toLowerCase())) {
console.log(`FATAL: foreground property should only be true or false`);
usage();
process.exit(1);
}
const foreground: boolean = (foregroundArg && foregroundArg.toLowerCase() === 'true') ? true : false;

const port: number = parseInt(strPort, 10);

const mode: string = parseArg(['--mode', '-m']);

const startMode: string = parseArg(['--start_mode', '-s']);
if (![ExperimentStartUpMode.NEW, ExperimentStartUpMode.RESUME].includes(startMode)) {
console.log(`FATAL: unknown start_mode: ${startMode}`);
usage();
process.exit(1);
}

const experimentId: string = parseArg(['--experiment_id', '-id']);
if (experimentId.trim().length < 1) {
console.log(`FATAL: cannot resume the experiment, invalid experiment_id: ${experimentId}`);
usage();
process.exit(1);
}

const logDir: string = parseArg(['--log_dir', '-ld']);
if (logDir.length > 0) {
if (!fs.existsSync(logDir)) {
console.log(`FATAL: log_dir ${logDir} does not exist`);
}
}

const logLevel: string = parseArg(['--log_level', '-ll']);

const readonlyArg: string = parseArg(['--readonly', '-r']);
if (readonlyArg && !['true', 'false'].includes(readonlyArg.toLowerCase())) {
console.log(`FATAL: readonly property should only be true or false`);
usage();
process.exit(1);
}
const readonly = (readonlyArg && readonlyArg.toLowerCase() == 'true') ? true : false;

const dispatcherPipe: string = parseArg(['--dispatcher_pipe']);

const urlPrefix: string = parseArg(['--url_prefix']);

initStartupInfo(startMode, experimentId, port, mode, logDir, logLevel, readonly, dispatcherPipe, urlPrefix);
setExperimentStartupInfo(
args.action === 'create',
args.experimentId,
args.port,
args.mode,
args.experimentsDirectory,
args.logLevel,
args.action === 'view',
args.dispatcherPipe ?? '',
args.urlPrefix
);

mkDirP(getLogDir())
.then(async () => {
try {
await initContainer(foreground, mode);
await initContainer();
const restServer: RestServer = component.get(RestServer);
await restServer.start();
} catch (err) {
Expand Down
Loading