Merge pull request #109 from Microsoft/master

merge master
microsoft · Jan 3, 2019 · 4e5d836 · 4e5d836
2 parents e661c55 + 50697a2
commit 4e5d836
Show file tree

Hide file tree

Showing 14 changed files with 1,379 additions and 392 deletions.
diff --git a/docs/ExperimentConfig.md b/docs/ExperimentConfig.md
@@ -5,6 +5,7 @@ The config file is written in yaml format, and need to be written correctly.
 This document describes the rule to write config file, and will provide some examples and templates. 
 ## Template
 * __light weight(without Annotation and Assessor)__ 
+
 ```
 authorName: 
 experimentName: 
@@ -34,7 +35,9 @@ machineList:
     username: 
     passwd: 
 ```
+
 * __Use Assessor__
+
 ```
 authorName: 
 experimentName: 
@@ -71,7 +74,9 @@ machineList:
     username: 
     passwd: 
 ```
+
 * __Use Annotation__
+
 ```
 authorName: 
 experimentName: 
@@ -107,6 +112,7 @@ machineList:
     username: 
     passwd: 
 ```
+
 ## Configuration
 * __authorName__
   * Description  
@@ -123,9 +129,9 @@ machineList:
 * __trialConcurrency__
   * Description
 
-	 __trialConcurrency__ specifies the max num of trial jobs run simultaneously.  
+      __trialConcurrency__ specifies the max num of trial jobs run simultaneously.  
 
-	    Note: if trialGpuNum is bigger than the free gpu numbers, and the trial jobs running simultaneously can not reach trialConcurrency number, some trial jobs will be put into a queue to wait for gpu allocation.
+      Note: if trialGpuNum is bigger than the free gpu numbers, and the trial jobs running simultaneously can not reach trialConcurrency number, some trial jobs will be put into a queue to wait for gpu allocation.
 
 * __maxExecDuration__
   * Description
@@ -155,22 +161,22 @@ machineList:
 * __searchSpacePath__
   * Description
 
-	 __searchSpacePath__ specifies the path of search space file, which should be a valid path in the local linux machine.
+      __searchSpacePath__ specifies the path of search space file, which should be a valid path in the local linux machine.
 
-	    Note: if set useAnnotation=True, the searchSpacePath field should be removed.
+      Note: if set useAnnotation=True, the searchSpacePath field should be removed.
 * __useAnnotation__
   * Description
 
     __useAnnotation__ use annotation to analysis trial code and generate search space. 
 
-	    Note: if set useAnnotation=True, the searchSpacePath field should be removed.
+    Note: if set useAnnotation=True, the searchSpacePath field should be removed.
 
 * __nniManagerIp__
   * Description
 
     __nniManagerIp__ set the IP address of the machine on which nni manager process runs. This field is optional, and if it's not set, eth0 device IP will be used instead.
 
-        Note: run ifconfig on NNI manager's machine to check if eth0 device exists. If not, we recommend to set nnimanagerIp explicitly.
+    Note: run ifconfig on NNI manager's machine to check if eth0 device exists. If not, we recommend to set nnimanagerIp explicitly.
 
 
 * __tuner__
@@ -181,76 +187,76 @@ machineList:
     * __builtinTunerName__
 
 	  __builtinTunerName__ specifies the name of system tuner, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__, __BatchTuner__, __GridSearch__}
-	 * __classArgs__
+    * __classArgs__
 
 	   __classArgs__ specifies the arguments of tuner algorithm. If the __builtinTunerName__ is in {__TPE__, __Random__, __Anneal__, __Evolution__}, user should set __optimize_mode__.
   * __codeDir__, __classFileName__, __className__ and __classArgs__
-      * __codeDir__
+     * __codeDir__
 
-		__codeDir__ specifies the directory of tuner code.
-	    * __classFileName__
+          __codeDir__ specifies the directory of tuner code.
+     * __classFileName__
 
-	  __classFileName__ specifies the name of tuner file.
+          __classFileName__ specifies the name of tuner file.
      * __className__
 
-	  __className__ specifies the name of tuner class.
+          __className__ specifies the name of tuner class.
      * __classArgs__
 
-	  __classArgs__ specifies the arguments of tuner algorithm.
+          __classArgs__ specifies the arguments of tuner algorithm.
   * __gpuNum__
 
-	  __gpuNum__ specifies the gpu number to run the tuner process. The value of this field should be a positive number.
+      __gpuNum__ specifies the gpu number to run the tuner process. The value of this field should be a positive number.
 
-	    Note: users could only specify one way to set tuner, for example, set {tunerName, optimizationMode} or {tunerCommand, tunerCwd}, and could not set them both. 
+      Note: users could only specify one way to set tuner, for example, set {tunerName, optimizationMode} or {tunerCommand, tunerCwd}, and could not set them both. 
 
 * __assessor__
 
   * Description
 
-    __assessor__ specifies the assessor algorithm to run an experiment, there are two kinds of ways to set assessor. One way is to use assessor provided by nni sdk, users need to set __builtinAssessorName__ and __classArgs__. Another way is to use users' own tuner file, and need to set __codeDirectory__, __classFileName__, __className__ and __classArgs__.
+    __assessor__ specifies the assessor algorithm to run an experiment, there are two kinds of ways to set assessor. One way is to use assessor provided by nni sdk, users need to set __builtinAssessorName__ and __classArgs__. Another way is to use users' own assessor file, and need to set __codeDirectory__, __classFileName__, __className__ and __classArgs__.
   * __builtinAssessorName__ and __classArgs__
     * __builtinAssessorName__
 
-	  __builtinAssessorName__ specifies the name of system assessor, nni sdk provides four kinds of tuner, including {__TPE__, __Random__, __Anneal__, __Evolution__}
-	 * __classArgs__
-	
-	   __classArgs__ specifies the arguments of tuner algorithm
+        __builtinAssessorName__ specifies the name of system assessor, nni sdk provides one kind of assessor {__Medianstop__}
+    * __classArgs__
+
+        __classArgs__ specifies the arguments of assessor algorithm
   * __codeDir__, __classFileName__, __className__ and __classArgs__
-      * __codeDir__
+    * __codeDir__
 
-		__codeDir__ specifies the directory of tuner code.
-	    * __classFileName__
+         __codeDir__ specifies the directory of assessor code.
+    * __classFileName__
 
-	  __classFileName__ specifies the name of tuner file.
-     * __className__
+         __classFileName__ specifies the name of assessor file.
+    * __className__
 
-	  __className__ specifies the name of tuner class.
-     * __classArgs__
+         __className__ specifies the name of assessor class.
+    * __classArgs__
 
-	  __classArgs__ specifies the arguments of tuner algorithm.
+         __classArgs__ specifies the arguments of assessor algorithm.
   * __gpuNum__
 
-	__gpuNum__ specifies the gpu number to run the assessor process. The value of this field should be a positive number.
+      __gpuNum__ specifies the gpu number to run the assessor process. The value of this field should be a positive number.
 
-        Note: users' could only specify one way to set assessor, for example,set {assessorName, optimizationMode} or {assessorCommand, assessorCwd}, and users could not set them both.If users do not want to use assessor, assessor fileld should leave to empty. 
+      Note: users' could only specify one way to set assessor, for example,set {assessorName, optimizationMode} or {assessorCommand, assessorCwd}, and users could not set them both.If users do not want to use assessor, assessor fileld should leave to empty. 
 * __trial(local, remote)__
   * __command__
 
       __command__  specifies the command to run trial process.
   * __codeDir__
 
-	  __codeDir__ specifies the directory of your own trial file.
+      __codeDir__ specifies the directory of your own trial file.
   * __gpuNum__
 
-	  __gpuNum__ specifies the num of gpu to run the trial process. Default value is 0. 
+      __gpuNum__ specifies the num of gpu to run the trial process. Default value is 0. 
 
 * __trial(pai)__
   * __command__
 
       __command__  specifies the command to run trial process.
   * __codeDir__
 
-	  __codeDir__ specifies the directory of the own trial file.
+      __codeDir__ specifies the directory of the own trial file.
   * __gpuNum__
 
 	  __gpuNum__ specifies the num of gpu to run the trial process. Default value is 0.
@@ -306,7 +312,7 @@ machineList:
 
     * __image__
 
-      __iamge__ set the image to be used in __ps__.
+      __image__ set the image to be used in __ps__.
 
   * __worker__
 
@@ -333,7 +339,7 @@ machineList:
 
     * __image__
 
-      __iamge__ set the image to be used in __worker__.
+      __image__ set the image to be used in __worker__.
 
 
 
@@ -347,7 +353,7 @@ machineList:
 
 	__port__ is the ssh port to be used to connect machine.
 
-	    Note: if users set port empty, the default value will be 22.
+	Note: if users set port empty, the default value will be 22.
   * __username__
 
 	__username__ is the account of remote machine.
@@ -359,7 +365,7 @@ machineList:
 
     If users use ssh key to login remote machine, could set __sshKeyPath__ in config file. __sshKeyPath__ is the path of ssh key file, which should be valid.
 
-	    Note: if users set passwd and sshKeyPath simultaneously, nni will try passwd.
+	Note: if users set passwd and sshKeyPath simultaneously, nni will try passwd.
 		
   * __passphrase__
 
@@ -426,6 +432,7 @@ machineList:
 * __local mode__
 
   If users want to run trial jobs in local machine, and use annotation to generate search space, could use the following config:
+
 ```
 authorName: test
 experimentName: test_experiment
@@ -450,6 +457,7 @@ trial:
 ```
 
   Could add assessor configuration in config file if set assessor.
+
 ```
 authorName: test
 experimentName: test_experiment
@@ -482,6 +490,7 @@ trial:
 ```
 
   Or you could specify your own tuner and assessor file as following:
+
 ```
 authorName: test
 experimentName: test_experiment
@@ -518,6 +527,7 @@ trial:
 * __remote mode__
 
 If run trial jobs in remote machine, users could specify the remote mahcine information as fllowing format:
+
 ```
 authorName: test
 experimentName: test_experiment
@@ -596,7 +606,6 @@ paiConfig:
   passWord: test
   #The host of restful server of pai
   host: 10.10.10.10
-
 ```
 
 * __kubeflow mode__
@@ -635,7 +644,9 @@ kubeflowConfig:
     server: 10.10.10.10
     path: /var/nfs/general
 ```
+
 kubeflow use azure storage
+
 ```
 authorName: default
 experimentName: example_mni

diff --git a/examples/tuners/enas_nni/README.md b/examples/tuners/enas_nni/README.md
@@ -0,0 +1,6 @@
+ **Run ENAS in NNI**	
+ ===	
+
+  Now we have an enas example [enas-nni](https://github.com/countif/enas_nni) run in nni from our contributors.	
+ Thanks our lovely contributors. 	
+ And welcome more and more people to join us!
diff --git a/src/nni_manager/common/manager.ts b/src/nni_manager/common/manager.ts
@@ -85,7 +85,7 @@ interface TrialJobStatistics {
 }
 
 interface NNIManagerStatus {
-    status: 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL';
+    status: 'INITIALIZED' | 'RUNNING' | 'ERROR' | 'STOPPING' | 'STOPPED' | 'DONE' | 'NO_MORE_TRIAL' | 'TUNER_NO_MORE_TRIAL';
     errors: string[];
 }
 

diff --git a/src/nni_manager/core/nnimanager.ts b/src/nni_manager/core/nnimanager.ts
@@ -425,14 +425,10 @@ class NNIManager implements Manager {
             throw new Error('Error: tuner has not been setup');
         }
         let allFinishedTrialJobNum: number = 0;
+        let waitSubmittedToFinish: number;
         while (this.status.status !== 'STOPPING' && this.status.status !== 'STOPPED') {
             const finishedTrialJobNum: number = await this.requestTrialJobsStatus();
-
             allFinishedTrialJobNum += finishedTrialJobNum;
-            if (allFinishedTrialJobNum >= this.experimentProfile.params.maxTrialNum) {
-                // write this log for travis CI
-                this.log.info('Experiment done.');
-            }
 
             // requestTrialNum is the number of trials that will be requested from tuner.
             // If trialConcurrency does not change, requestTrialNum equals finishedTrialJobNum.
@@ -467,21 +463,29 @@ class NNIManager implements Manager {
             // as still running. DONE could be transfered from RUNNING or NO_MORE_TRIAL.
             assert(this.status.status === 'RUNNING' ||
                 this.status.status === 'DONE' ||
-                this.status.status === 'NO_MORE_TRIAL');
+                this.status.status === 'NO_MORE_TRIAL' ||
+                this.status.status === 'TUNER_NO_MORE_TRIAL');
             if (this.experimentProfile.execDuration > this.experimentProfile.params.maxExecDuration ||
                 this.currSubmittedTrialNum >= this.experimentProfile.params.maxTrialNum) {
-                if (this.status.status === 'RUNNING' ||
-                    this.status.status === 'NO_MORE_TRIAL') {
-                    this.experimentProfile.endTime = Date.now();
-                    await this.storeExperimentProfile();
+                if (this.status.status !== 'DONE') {
+                    this.status.status = 'NO_MORE_TRIAL';
+                    waitSubmittedToFinish = this.currSubmittedTrialNum;
+
+                    assert(allFinishedTrialJobNum <= waitSubmittedToFinish);
+                    if (allFinishedTrialJobNum >= waitSubmittedToFinish) {
+                        this.status.status = 'DONE';
+                        this.experimentProfile.endTime = Date.now();
+                        await this.storeExperimentProfile();
+                        // write this log for travis CI
+                        this.log.info('Experiment done.');
+                    }
                 }
-                this.status.status = 'DONE';
             } else {
                 if (this.status.status === 'DONE') {
                     delete this.experimentProfile.endTime;
                     await this.storeExperimentProfile();
                 }
-                if (this.status.status !== 'NO_MORE_TRIAL') {
+                if (this.status.status !== 'TUNER_NO_MORE_TRIAL') {
                     this.status.status = 'RUNNING';
                 }
                 for (let i: number = this.trialJobs.size; i < this.experimentProfile.params.trialConcurrency; i++) {
@@ -602,7 +606,7 @@ class NNIManager implements Manager {
                 this.requestTrialJobs(this.experimentProfile.params.trialConcurrency);
                 break;
             case NEW_TRIAL_JOB:
-                if (this.status.status === 'NO_MORE_TRIAL') {
+                if (this.status.status === 'TUNER_NO_MORE_TRIAL') {
                     this.log.warning('It is not supposed to receive more trials after NO_MORE_TRIAL is set');
                     this.status.status = 'RUNNING';
                 }
@@ -625,7 +629,7 @@ class NNIManager implements Manager {
                     'ADD_HYPERPARAMETER', tunerCommand.trial_job_id, content, undefined);
                 break;
             case NO_MORE_TRIAL_JOBS:
-                this.status.status = 'NO_MORE_TRIAL';
+                this.status.status = 'TUNER_NO_MORE_TRIAL';
                 break;
             case KILL_TRIAL_JOB:
                 await this.trainingService.cancelTrialJob(JSON.parse(content), true);

diff --git a/src/nni_manager/core/test/import_all.test.ts b/src/nni_manager/core/test/import_all.test.ts
@@ -0,0 +1,15 @@
+import * as glob from 'glob';
+
+// Istanbul only generates report for used/imported files, the files are not used/imported by test cases
+// are not included in code coverage reports.
+// This is a workaround to import all files in order to show all source files in code coverage reports.
+
+glob.sync('**/*.ts').forEach((file) => {
+    if (file.indexOf('node_modules/') < 0 && file.indexOf('types/') < 0
+        && file.indexOf('.test.ts') < 0 && file.indexOf('main.ts')) {
+        try {
+            import('../../' + file);
+        } catch(err) {
+        }
+    }
+})