Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
fix bug about execDuration and endTime (#270)
Browse files Browse the repository at this point in the history
* fix bug about execDuration and endTime

* modify time interval to 30 seconds

* refactor based on Gems's suggestion

* for triggering ci
  • Loading branch information
QuanluZhang authored Oct 26, 2018
1 parent 60ad940 commit 8c93890
Showing 1 changed file with 15 additions and 16 deletions.
31 changes: 15 additions & 16 deletions src/nni_manager/core/nnimanager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ import { createDispatcherInterface, IpcInterface } from './ipcInterface';
class NNIManager implements Manager {
private trainingService: TrainingService;
private dispatcher: IpcInterface | undefined;
private currSubmittedTrialNum: number; // need to be recovered
private currSubmittedTrialNum: number; // need to be recovered
private trialConcurrencyChange: number; // >0: increase, <0: decrease
private customizedTrials: string[]; // need to be recovered
private log: Logger;
Expand All @@ -58,7 +58,6 @@ class NNIManager implements Manager {
private status: NNIManagerStatus;
private waitingTrials: string[];
private trialJobs: Map<string, TrialJobDetail>;
private suspendDuration: number;

constructor() {
this.currSubmittedTrialNum = 0;
Expand All @@ -69,7 +68,6 @@ class NNIManager implements Manager {
this.dispatcherPid = 0;
this.waitingTrials = [];
this.trialJobs = new Map<string, TrialJobDetail>();
this.suspendDuration = 0;

this.log = getLogger();
this.dataStore = component.get(DataStore);
Expand Down Expand Up @@ -336,12 +334,16 @@ class NNIManager implements Manager {
}

private async periodicallyUpdateExecDuration(): Promise<void> {
const startTime: number = Date.now();
const execDuration: number = this.experimentProfile.execDuration;
let count: number = 1;
for (; ;) {
await delay(1000 * 60 * 10); // 10 minutes
this.experimentProfile.execDuration = execDuration + (Date.now() - startTime) / 1000 - this.suspendDuration;
await this.storeExperimentProfile();
await delay(1000 * 1); // 1 seconds
if (this.status.status === 'EXPERIMENT_RUNNING') {
this.experimentProfile.execDuration += 1;
if (count % 10 === 0) {
await this.storeExperimentProfile();
}
}
count += 1;
}
}

Expand All @@ -351,7 +353,6 @@ class NNIManager implements Manager {
for (const trialJobId of Array.from(this.trialJobs.keys())) {
const trialJobDetail: TrialJobDetail = await this.trainingService.getTrialJob(trialJobId);
const oldTrialJobDetail: TrialJobDetail | undefined = this.trialJobs.get(trialJobId);
//assert(oldTrialJobDetail);
if (oldTrialJobDetail !== undefined && oldTrialJobDetail.status !== trialJobDetail.status) {
this.trialJobs.set(trialJobId, Object.assign({}, trialJobDetail));
await this.dataStore.storeTrialJobEvent(trialJobDetail.status, trialJobDetail.id, undefined, trialJobDetail.url);
Expand Down Expand Up @@ -388,8 +389,6 @@ class NNIManager implements Manager {
throw new Error('Error: tuner has not been setup');
}
let allFinishedTrialJobNum: number = 0;
const startTime: number = Date.now();
let suspendStartTime: number = 0;
for (; ;) {
if (this.status.status === 'STOPPING') {
break;
Expand Down Expand Up @@ -426,18 +425,18 @@ class NNIManager implements Manager {
}

// check maxtrialnum and maxduration here
if ((Date.now() - startTime) / 1000 + this.experimentProfile.execDuration - this.suspendDuration
> this.experimentProfile.params.maxExecDuration ||
if (this.experimentProfile.execDuration > this.experimentProfile.params.maxExecDuration ||
this.currSubmittedTrialNum >= this.experimentProfile.params.maxTrialNum) {
assert(this.status.status === 'EXPERIMENT_RUNNING' || this.status.status === 'DONE');
if (this.status.status === 'EXPERIMENT_RUNNING') {
suspendStartTime = Date.now();
this.experimentProfile.endTime = Date.now();
await this.storeExperimentProfile();
}
this.status.status = 'DONE';
} else {
if (this.status.status === 'DONE') {
assert(suspendStartTime !== 0);
this.suspendDuration += (Date.now() - suspendStartTime) / 1000;
delete this.experimentProfile.endTime;
await this.storeExperimentProfile();
}
this.status.status = 'EXPERIMENT_RUNNING';
for (let i: number = this.trialJobs.size; i < this.experimentProfile.params.trialConcurrency; i++) {
Expand Down

0 comments on commit 8c93890

Please sign in to comment.