Skip to content

Commit

Permalink
Merge pull request #11 from demianzhang/dev-local
Browse files Browse the repository at this point in the history
Refactor and Fix
  • Loading branch information
demianzhang authored Apr 5, 2019
2 parents e13afbb + ee2bc29 commit f763344
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 118 deletions.
17 changes: 13 additions & 4 deletions src/nni_manager/common/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ async function getVersion(): Promise<string> {
/**
* run command as ChildProcess
*/
function getTunerProc(command: string, stdio: StdioOptions, newCwd: string, newEnv: any):ChildProcess{
function getTunerProc(command: string, stdio: StdioOptions, newCwd: string, newEnv: any): ChildProcess{
let cmd: string = command;
let arg: string[] = [];
let newShell: boolean = true;
Expand All @@ -406,7 +406,7 @@ function getTunerProc(command: string, stdio: StdioOptions, newCwd: string, newE
/**
* judge whether the process is alive
*/
async function isAlive(pid:any):Promise<boolean>{
async function isAlive(pid:any): Promise<boolean>{
let deferred : Deferred<boolean> = new Deferred<boolean>();
let alive: boolean = false;
if(process.platform ==='win32'){
Expand All @@ -430,7 +430,7 @@ async function isAlive(pid:any):Promise<boolean>{
/**
* kill process
*/
async function killPid(pid:any):Promise<void>{
async function killPid(pid:any): Promise<void>{
let deferred : Deferred<void> = new Deferred<void>();
try {
if (process.platform === "win32") {
Expand All @@ -446,6 +446,15 @@ async function killPid(pid:any):Promise<void>{
return deferred.promise;
}

function getNewLine(): string{
if (process.platform === "win32") {
return "\r\n";
}
else{
return "\n";
}
}

export {countFilesRecursively, getRemoteTmpDir, generateParamFileName, getMsgDispatcherCommand, getCheckpointDir,
getLogDir, getExperimentRootDir, getJobCancelStatus, getDefaultDatabaseDir, getIPV4Address,
mkDirP, delay, prepareUnitTest, parseArg, cleanupUnitTest, uniqueString, randomSelect, getLogLevel, getVersion, getCmdPy, getTunerProc, isAlive, killPid };
mkDirP, delay, prepareUnitTest, parseArg, cleanupUnitTest, uniqueString, randomSelect, getLogLevel, getVersion, getCmdPy, getTunerProc, isAlive, killPid, getNewLine };
35 changes: 31 additions & 4 deletions src/nni_manager/training_service/common/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,28 @@ export async function execMkdir(directory: string): Promise<void> {
return Promise.resolve();
}

/**
* crete a new file
* @param filename
*/
export async function execNewFile(filename: string): Promise<void> {
if (process.platform === 'win32') {
await cpp.exec(`powershell.exe New-Item -Path ${filename} -ItemType "file" -Force`);
} else {
await cpp.exec(`touch ${filename}`);
}
return Promise.resolve();
}

/**
* run script
* @param filePath
*/
export function execScript(filePath: string): void {
export function execScript(filePath: string): cp.ChildProcess {
if (process.platform === 'win32') {
cp.exec(`powershell.exe -file ${filePath}`);
return cp.exec(`powershell.exe -file ${filePath}`);
} else {
cp.exec(`bash ${filePath}`);
return cp.exec(`bash ${filePath}`);
}
}

Expand Down Expand Up @@ -113,13 +126,27 @@ export async function execRemove(directory: string): Promise<void>{
*/
export async function execKill(pid: string): Promise<void>{
if (process.platform === 'win32') {
await cpp.exec(`powershell.exe kill ${pid}`);
await cpp.exec(`cmd /c taskkill /PID ${pid} /T`);
} else {
await cpp.exec(`pkill -P ${pid}`);
}
return Promise.resolve();
}

/**
* set environment variable
* @param variable
* @returns command string
*/
export function setEnvironmentVariable(variable: { key: string; value: string }): string{
if (process.platform === 'win32') {
return `$env:${variable.key}="${variable.value}"`;
}
else{
return `export ${variable.key}=${variable.value}`;
}
}


/**
* generate script file name
Expand Down
62 changes: 28 additions & 34 deletions src/nni_manager/training_service/local/localTrainingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ import {
HostJobApplicationForm, JobApplicationForm, HyperParameters, TrainingService, TrialJobApplicationForm,
TrialJobDetail, TrialJobMetric, TrialJobStatus
} from '../../common/trainingService';
import { delay, generateParamFileName, getExperimentRootDir, uniqueString, getJobCancelStatus, isAlive } from '../../common/utils';
import { delay, generateParamFileName, getExperimentRootDir, uniqueString, getJobCancelStatus, isAlive, getNewLine } from '../../common/utils';
import { execMkdir, getScriptName, execScript, setEnvironmentVariable, execNewFile } from '../common/util'

const tkill = require('tree-kill');

Expand Down Expand Up @@ -352,43 +353,21 @@ class LocalTrainingService implements TrainingService {
}
}

private async runScript(localTrailConfig: TrialConfig, workingDirectory: string, variables: { key: string; value: string }[]):Promise<string[]>{
let cmdParameter: string[] = [];
const runScriptLines: string[] = [];
private getScript(localTrailConfig: TrialConfig, workingDirectory: string): string[]{
let script: string[] = [];
if (process.platform === "win32") {
runScriptLines.push(`cd ${localTrailConfig.codeDir}`);
for (const variable of variables) {
runScriptLines.push(`$env:${variable.key}="${variable.value}"`);
}
runScriptLines.push(
script.push(
`cmd /c ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
`$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`,
`$NOW_DATE = "$NOW_DATE" + "000"`,
`Write $LASTEXITCODE " " $NOW_DATE | Out-File ${path.join(workingDirectory, '.nni', 'state')} -NoNewline -encoding utf8`
);
await cpp.exec(`powershell.exe mkdir ${workingDirectory}`);
await cpp.exec(`powershell.exe mkdir ${path.join(workingDirectory, '.nni')}`);
await cpp.exec(`powershell.exe New-Item ${path.join(workingDirectory, '.nni', 'metrics')}`);
await fs.promises.writeFile(path.join(workingDirectory, 'run.ps1'), runScriptLines.join('\r\n'), { encoding: 'utf8', mode: 0o777 });
cmdParameter.push("powershell");
cmdParameter.push("run.ps1");
`Write $LASTEXITCODE " " $NOW_DATE | Out-File ${path.join(workingDirectory, '.nni', 'state')} -NoNewline -encoding utf8`);
}
else{
runScriptLines.push('#!/bin/bash', `cd ${localTrailConfig.codeDir}`);
for (const variable of variables) {
runScriptLines.push(`export ${variable.key}=${variable.value}`);
}
runScriptLines.push(
`eval ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
script.push(
`eval ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
`echo $? \`date +%s000\` >${path.join(workingDirectory, '.nni', 'state')}`);
await cpp.exec(`mkdir -p ${workingDirectory}`);
await cpp.exec(`mkdir -p ${path.join(workingDirectory, '.nni')}`);
await cpp.exec(`touch ${path.join(workingDirectory, '.nni', 'metrics')}`);
await fs.promises.writeFile(path.join(workingDirectory, 'run.sh'), runScriptLines.join('\n'), { encoding: 'utf8', mode: 0o777 });
cmdParameter.push("bash");
cmdParameter.push("run.sh");
}
return Promise.resolve(cmdParameter);
return script;
}

private async runTrialJob(trialJobId: string, resource: {}): Promise<void> {
Expand All @@ -398,13 +377,28 @@ class LocalTrainingService implements TrainingService {
if (!this.localTrailConfig) {
throw new Error('trial config is not initialized');
}
let cmd: string[] = await this.runScript(this.localTrailConfig, trialJobDetail.workingDirectory, variables);
const runScriptLines: string[] = [];
if (process.platform !== "win32"){
runScriptLines.push('#!/bin/bash');
}
runScriptLines.push(`cd ${this.localTrailConfig.codeDir}`);
for (const variable of variables) {
runScriptLines.push(setEnvironmentVariable(variable));
}
const scripts: string[] = this.getScript(this.localTrailConfig, trialJobDetail.workingDirectory);
scripts.forEach(script => {
runScriptLines.push(script);
});
await execMkdir(trialJobDetail.workingDirectory);
await execMkdir(path.join(trialJobDetail.workingDirectory, '.nni'));
await execNewFile(path.join(trialJobDetail.workingDirectory, '.nni', 'metrics'));
const scriptName: string = getScriptName('run');
await fs.promises.writeFile(path.join(trialJobDetail.workingDirectory, scriptName), runScriptLines.join(getNewLine()), { encoding: 'utf8', mode: 0o777 });
await this.writeParameterFile(trialJobDetail.workingDirectory, (<TrialJobApplicationForm>trialJobDetail.form).hyperParameters);
const process: cp.ChildProcess = cp.exec(`${cmd[0]} ${path.join(trialJobDetail.workingDirectory, cmd[1])}`);

const trialJobProcess: cp.ChildProcess = execScript(path.join(trialJobDetail.workingDirectory, scriptName));
this.setTrialJobStatus(trialJobDetail, 'RUNNING');
trialJobDetail.startTime = Date.now();
trialJobDetail.pid = process.pid;
trialJobDetail.pid = trialJobProcess.pid;
this.setExtraProperties(trialJobDetail, resource);

let buffer: Buffer = Buffer.alloc(0);
Expand Down
47 changes: 0 additions & 47 deletions test/it_local.ps1

This file was deleted.

27 changes: 0 additions & 27 deletions test/unittest.ps1

This file was deleted.

7 changes: 5 additions & 2 deletions tools/nni_gpu_tool/gpu_metrics_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@
from xml.dom import minidom

def check_ready_to_run():
#TODO check process in windows
if sys.platform == 'win32':
return True
pgrep_output = subprocess.check_output('wmic process where "CommandLine like \'%nni_gpu_tool.gpu_metrics_collector%\'" get processid')
pidList = pgrep_output.decode("utf-8").strip().split()
pidList.remove('ProcessId')
pidList = list(map(int, pidList))
return len(pidList) == 1
pgrep_output =subprocess.check_output('pgrep -fx \'python3 -m nni_gpu_tool.gpu_metrics_collector\'', shell=True)
pidList = []
for pid in pgrep_output.splitlines():
Expand Down

0 comments on commit f763344

Please sign in to comment.