From 8c93890bd39ed05d7109619da57dc4675b0fd28a Mon Sep 17 00:00:00 2001 From: QuanluZhang Date: Fri, 26 Oct 2018 09:28:40 +0800 Subject: [PATCH] fix bug about execDuration and endTime (#270) * fix bug about execDuration and endTime * modify time interval to 30 seconds * refactor based on Gems's suggestion * for triggering ci --- src/nni_manager/core/nnimanager.ts | 31 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/nni_manager/core/nnimanager.ts b/src/nni_manager/core/nnimanager.ts index 19f5afe08e..796236c670 100644 --- a/src/nni_manager/core/nnimanager.ts +++ b/src/nni_manager/core/nnimanager.ts @@ -48,7 +48,7 @@ import { createDispatcherInterface, IpcInterface } from './ipcInterface'; class NNIManager implements Manager { private trainingService: TrainingService; private dispatcher: IpcInterface | undefined; - private currSubmittedTrialNum: number; // need to be recovered + private currSubmittedTrialNum: number; // need to be recovered private trialConcurrencyChange: number; // >0: increase, <0: decrease private customizedTrials: string[]; // need to be recovered private log: Logger; @@ -58,7 +58,6 @@ class NNIManager implements Manager { private status: NNIManagerStatus; private waitingTrials: string[]; private trialJobs: Map; - private suspendDuration: number; constructor() { this.currSubmittedTrialNum = 0; @@ -69,7 +68,6 @@ class NNIManager implements Manager { this.dispatcherPid = 0; this.waitingTrials = []; this.trialJobs = new Map(); - this.suspendDuration = 0; this.log = getLogger(); this.dataStore = component.get(DataStore); @@ -336,12 +334,16 @@ class NNIManager implements Manager { } private async periodicallyUpdateExecDuration(): Promise { - const startTime: number = Date.now(); - const execDuration: number = this.experimentProfile.execDuration; + let count: number = 1; for (; ;) { - await delay(1000 * 60 * 10); // 10 minutes - this.experimentProfile.execDuration = execDuration + (Date.now() - startTime) / 1000 - this.suspendDuration; - await this.storeExperimentProfile(); + await delay(1000 * 1); // 1 seconds + if (this.status.status === 'EXPERIMENT_RUNNING') { + this.experimentProfile.execDuration += 1; + if (count % 10 === 0) { + await this.storeExperimentProfile(); + } + } + count += 1; } } @@ -351,7 +353,6 @@ class NNIManager implements Manager { for (const trialJobId of Array.from(this.trialJobs.keys())) { const trialJobDetail: TrialJobDetail = await this.trainingService.getTrialJob(trialJobId); const oldTrialJobDetail: TrialJobDetail | undefined = this.trialJobs.get(trialJobId); - //assert(oldTrialJobDetail); if (oldTrialJobDetail !== undefined && oldTrialJobDetail.status !== trialJobDetail.status) { this.trialJobs.set(trialJobId, Object.assign({}, trialJobDetail)); await this.dataStore.storeTrialJobEvent(trialJobDetail.status, trialJobDetail.id, undefined, trialJobDetail.url); @@ -388,8 +389,6 @@ class NNIManager implements Manager { throw new Error('Error: tuner has not been setup'); } let allFinishedTrialJobNum: number = 0; - const startTime: number = Date.now(); - let suspendStartTime: number = 0; for (; ;) { if (this.status.status === 'STOPPING') { break; @@ -426,18 +425,18 @@ class NNIManager implements Manager { } // check maxtrialnum and maxduration here - if ((Date.now() - startTime) / 1000 + this.experimentProfile.execDuration - this.suspendDuration - > this.experimentProfile.params.maxExecDuration || + if (this.experimentProfile.execDuration > this.experimentProfile.params.maxExecDuration || this.currSubmittedTrialNum >= this.experimentProfile.params.maxTrialNum) { assert(this.status.status === 'EXPERIMENT_RUNNING' || this.status.status === 'DONE'); if (this.status.status === 'EXPERIMENT_RUNNING') { - suspendStartTime = Date.now(); + this.experimentProfile.endTime = Date.now(); + await this.storeExperimentProfile(); } this.status.status = 'DONE'; } else { if (this.status.status === 'DONE') { - assert(suspendStartTime !== 0); - this.suspendDuration += (Date.now() - suspendStartTime) / 1000; + delete this.experimentProfile.endTime; + await this.storeExperimentProfile(); } this.status.status = 'EXPERIMENT_RUNNING'; for (let i: number = this.trialJobs.size; i < this.experimentProfile.params.trialConcurrency; i++) {