From fab183c7c9ce265fe69fb00256648a8f45218584 Mon Sep 17 00:00:00 2001
From: liuzhe <zhe.liu@microsoft.com>
Date: Mon, 29 Jul 2019 16:31:08 +0800
Subject: [PATCH 1/7] Handle nvidia-smi not exist problem

---
 tools/nni_gpu_tool/gpu_metrics_collector.py | 22 ++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/tools/nni_gpu_tool/gpu_metrics_collector.py b/tools/nni_gpu_tool/gpu_metrics_collector.py
index 56095a0362..1da8ccb3ff 100644
--- a/tools/nni_gpu_tool/gpu_metrics_collector.py
+++ b/tools/nni_gpu_tool/gpu_metrics_collector.py
@@ -21,6 +21,7 @@
 import subprocess
 import sys
 import time
+import traceback
 
 from xml.dom import minidom
 
@@ -33,7 +34,7 @@ def check_ready_to_run():
         pidList.remove(os.getpid())
         return len(pidList) == 0
     else:
-        pgrep_output =subprocess.check_output('pgrep -fx \'python3 -m nni_gpu_tool.gpu_metrics_collector\'', shell=True)
+        pgrep_output = subprocess.check_output('pgrep -fx \'python3 -m nni_gpu_tool.gpu_metrics_collector\'', shell=True)
         pidList = []
         for pid in pgrep_output.splitlines():
             pidList.append(int(pid))
@@ -48,11 +49,13 @@ def main(argv):
     with open(os.path.join(metrics_output_dir, "gpu_metrics"), "w") as outputFile:
         pass
     os.chmod(os.path.join(metrics_output_dir, "gpu_metrics"), 0o777)
-    cmd = 'nvidia-smi -q -x'
+    cmd = 'nvidia-smi -q -x'.split()
     while(True):
         try:
-            smi_output = subprocess.check_output(cmd, shell=True)
+            smi_output = subprocess.check_output(cmd)
             parse_nvidia_smi_result(smi_output, metrics_output_dir)
+        except FileNotFoundError:
+            gen_empty_gpu_metric(smi_output)
         except:
             exception = sys.exc_info()
             for e in exception:
@@ -86,6 +89,19 @@ def parse_nvidia_smi_result(smi, outputDir):
         e_info = sys.exc_info()
         print('xmldoc paring error')
 
+def gen_empty_gpu_metric(outputDir):
+    try:
+        with open(os.path.join(outputDir, "gpu_metrics"), 'a') as outputFile:
+            outPut = {}
+            outPut["Timestamp"] = time.asctime(time.localtime())
+            outPut["gpuCount"] = 0
+            outPut["gpuInfos"] = []
+            print(outPut)
+            outputFile.write("{}\n".format(json.dumps(outPut, sort_keys=True)))
+            outputFile.flush()
+    except Exception:
+        traceback.print_exc()
+
 
 if __name__ == "__main__":
     main(sys.argv[1:])

From 507434961fb15d3dcb1f696d1de752e4d7fb7245 Mon Sep 17 00:00:00 2001
From: liuzhe <zhe.liu@microsoft.com>
Date: Tue, 6 Aug 2019 13:04:18 +0800
Subject: [PATCH 2/7] bugfix

---
 tools/nni_gpu_tool/gpu_metrics_collector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/nni_gpu_tool/gpu_metrics_collector.py b/tools/nni_gpu_tool/gpu_metrics_collector.py
index 1da8ccb3ff..7847a52a5c 100644
--- a/tools/nni_gpu_tool/gpu_metrics_collector.py
+++ b/tools/nni_gpu_tool/gpu_metrics_collector.py
@@ -55,7 +55,7 @@ def main(argv):
             smi_output = subprocess.check_output(cmd)
             parse_nvidia_smi_result(smi_output, metrics_output_dir)
         except FileNotFoundError:
-            gen_empty_gpu_metric(smi_output)
+            gen_empty_gpu_metric(metrics_output_dir)
         except:
             exception = sys.exc_info()
             for e in exception:

From 69239b4bc33ed2e401e30b9ab9fe4298cd1e3c0c Mon Sep 17 00:00:00 2001
From: liuzhe <zhe.liu@microsoft.com>
Date: Thu, 8 Aug 2019 16:11:14 +0800
Subject: [PATCH 3/7] Notify user when GPU not found

---
 Makefile                                      |  2 +-
 examples/trials/mnist/config.yml              |  2 +-
 .../training_service/local/gpuScheduler.ts    |  3 ++
 .../local/localTrainingService.ts             | 42 +++++++++----------
 .../remoteMachineTrainingService.ts           |  6 ++-
 tools/nni_gpu_tool/gpu_metrics_collector.py   |  1 +
 6 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/Makefile b/Makefile
index 9217c83dba..8877e5c2ae 100644
--- a/Makefile
+++ b/Makefile
@@ -54,7 +54,7 @@ NNI_NODE_FOLDER = $(NNI_DEPENDENCY_FOLDER)/nni-node-$(OS_SPEC)-x64
 NNI_NODE ?= $(BIN_FOLDER)/node
 NNI_YARN_TARBALL ?= $(NNI_DEPENDENCY_FOLDER)/nni-yarn.tar.gz
 NNI_YARN_FOLDER ?= $(NNI_DEPENDENCY_FOLDER)/nni-yarn
-NNI_YARN := PATH=$(BIN_FOLDER):$${PATH} $(NNI_YARN_FOLDER)/bin/yarn
+NNI_YARN ?= PATH=$(BIN_FOLDER):$${PATH} $(NNI_YARN_FOLDER)/bin/yarn
 
 ## Version number
 NNI_VERSION_VALUE = $(shell git describe --tags)
diff --git a/examples/trials/mnist/config.yml b/examples/trials/mnist/config.yml
index 794ca1cef6..231c661ec3 100644
--- a/examples/trials/mnist/config.yml
+++ b/examples/trials/mnist/config.yml
@@ -18,4 +18,4 @@ tuner:
 trial:
   command: python3 mnist.py
   codeDir: .
-  gpuNum: 0
+  gpuNum: 1
diff --git a/src/nni_manager/training_service/local/gpuScheduler.ts b/src/nni_manager/training_service/local/gpuScheduler.ts
index 933235a222..03e8d91afa 100644
--- a/src/nni_manager/training_service/local/gpuScheduler.ts
+++ b/src/nni_manager/training_service/local/gpuScheduler.ts
@@ -54,6 +54,9 @@ class GPUScheduler {
             } catch (error) {
                 this.log.error('Read GPU summary failed with error: ', error);
             }
+            if (this.gpuSummary.gpuCount === 0) {
+                throw new Error('GPU not available. Please check your CUDA configuration');
+            }
             await delay(5000);
         }
     }
diff --git a/src/nni_manager/training_service/local/localTrainingService.ts b/src/nni_manager/training_service/local/localTrainingService.ts
index 1fb2cb9327..c47be9ec32 100644
--- a/src/nni_manager/training_service/local/localTrainingService.ts
+++ b/src/nni_manager/training_service/local/localTrainingService.ts
@@ -131,7 +131,7 @@ class LocalTrainingService implements TrainingService {
     private readonly occupiedGpuIndexNumMap: Map<number, number>;
     private designatedGpuIndices!: Set<number>;
     private readonly log: Logger;
-    private localTrailConfig?: TrialConfig;
+    private localTrialConfig?: TrialConfig;
     private localConfig?: LocalConfig;
     private isMultiPhase: boolean;
     private readonly jobStreamMap: Map<string, ts.Stream>;
@@ -204,7 +204,7 @@ class LocalTrainingService implements TrainingService {
                 } catch (error) {
                     //ignore
                 }
-                this.log.debug(`trailJob status update: ${trialJobId}, ${trialJob.status}`);
+                this.log.debug(`trialJob status update: ${trialJobId}, ${trialJob.status}`);
             }
         }
 
@@ -302,14 +302,14 @@ class LocalTrainingService implements TrainingService {
         }
         switch (key) {
             case TrialConfigMetadataKey.TRIAL_CONFIG:
-                this.localTrailConfig = <TrialConfig>JSON.parse(value);
+                this.localTrialConfig = <TrialConfig>JSON.parse(value);
                 // Parse trial config failed, throw Error
-                if (this.localTrailConfig === undefined) {
+                if (this.localTrialConfig === undefined) {
                     throw new Error('trial config parsed failed');
                 }
-                if (this.localTrailConfig.gpuNum !== undefined) {
-                    this.log.info(`required GPU number is ${this.localTrailConfig.gpuNum}`);
-                    if (this.gpuScheduler === undefined && this.localTrailConfig.gpuNum > 0) {
+                if (this.localTrialConfig.gpuNum !== undefined) {
+                    this.log.info(`required GPU number is ${this.localTrialConfig.gpuNum}`);
+                    if (this.gpuScheduler === undefined && this.localTrialConfig.gpuNum > 0) {
                         this.gpuScheduler = new GPUScheduler();
                     }
                 }
@@ -343,10 +343,10 @@ class LocalTrainingService implements TrainingService {
         switch (key) {
             case TrialConfigMetadataKey.TRIAL_CONFIG:
                 let getResult: Promise<string>;
-                if (this.localTrailConfig === undefined) {
+                if (this.localTrialConfig === undefined) {
                     getResult = Promise.reject(new NNIError(NNIErrorNames.NOT_FOUND, `${key} is never set yet`));
                 } else {
-                    getResult = Promise.resolve(JSON.stringify(this.localTrailConfig));
+                    getResult = Promise.resolve(JSON.stringify(this.localTrialConfig));
                 }
 
                 return getResult;
@@ -427,8 +427,8 @@ class LocalTrainingService implements TrainingService {
     }
 
     private tryGetAvailableResource(): [boolean, { gpuIndices: number[]}] {
-        if (this.localTrailConfig === undefined) {
-            throw new Error('localTrailConfig is not initialized!');
+        if (this.localTrialConfig === undefined) {
+            throw new Error('localTrialConfig is not initialized!');
         }
 
         const resource: { gpuIndices: number[] } = { gpuIndices: [] };
@@ -450,11 +450,11 @@ class LocalTrainingService implements TrainingService {
             selectedGPUIndices = selectedGPUIndices.filter((index: number) => this.designatedGpuIndices.has(index));
         }
 
-        if (selectedGPUIndices.length < this.localTrailConfig.gpuNum) {
+        if (selectedGPUIndices.length < this.localTrialConfig.gpuNum) {
             return [false, resource];
         }
 
-        selectedGPUIndices.splice(this.localTrailConfig.gpuNum);
+        selectedGPUIndices.splice(this.localTrialConfig.gpuNum);
         Object.assign(resource, { gpuIndices: selectedGPUIndices });
 
         return [true, resource];
@@ -512,17 +512,17 @@ class LocalTrainingService implements TrainingService {
         }
     }
 
-    private getScript(localTrailConfig: TrialConfig, workingDirectory: string): string[] {
+    private getScript(localTrialConfig: TrialConfig, workingDirectory: string): string[] {
         const script: string[] = [];
         if (process.platform === 'win32') {
             script.push(
-                `cmd /c ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
+                `cmd /c ${localTrialConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
                 `$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`,
                 `$NOW_DATE = "$NOW_DATE" + (Get-Date -Format fff).ToString()`,
                 `Write $LASTEXITCODE " " $NOW_DATE  | Out-File ${path.join(workingDirectory, '.nni', 'state')} -NoNewline -encoding utf8`);
         } else {
             script.push(
-                `eval ${localTrailConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
+                `eval ${localTrialConfig.command} 2>${path.join(workingDirectory, 'stderr')}`,
                 `echo $? \`date +%s%3N\` >${path.join(workingDirectory, '.nni', 'state')}`);
         }
 
@@ -531,23 +531,23 @@ class LocalTrainingService implements TrainingService {
 
     private async runTrialJob(trialJobId: string, resource: {gpuIndices: number[]}): Promise<void> {
         const trialJobDetail: LocalTrialJobDetail = <LocalTrialJobDetail>this.jobMap.get(trialJobId);
-        if (this.localTrailConfig === undefined) {
+        if (this.localTrialConfig === undefined) {
             throw new Error(`localTrialConfig not initialized!`);
         }
-        const variables: { key: string; value: string }[] = this.getEnvironmentVariables(trialJobDetail, resource, this.localTrailConfig.gpuNum);
+        const variables: { key: string; value: string }[] = this.getEnvironmentVariables(trialJobDetail, resource, this.localTrialConfig.gpuNum);
 
-        if (this.localTrailConfig === undefined) {
+        if (this.localTrialConfig === undefined) {
             throw new Error('trial config is not initialized');
         }
         const runScriptContent: string[] = [];
         if (process.platform !== 'win32') {
             runScriptContent.push('#!/bin/bash');
         }
-        runScriptContent.push(`cd ${this.localTrailConfig.codeDir}`);
+        runScriptContent.push(`cd ${this.localTrialConfig.codeDir}`);
         for (const variable of variables) {
             runScriptContent.push(setEnvironmentVariable(variable));
         }
-        const scripts: string[] = this.getScript(this.localTrailConfig, trialJobDetail.workingDirectory);
+        const scripts: string[] = this.getScript(this.localTrialConfig, trialJobDetail.workingDirectory);
         scripts.forEach((script: string) => {
             runScriptContent.push(script);
         });
diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
index c55c28427b..b9d42f6fc3 100644
--- a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
+++ b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
@@ -511,12 +511,16 @@ class RemoteMachineTrainingService implements TrainingService {
         // tslint:disable-next-line: no-floating-promises
         SSHClientUtility.remoteExeCommand(`bash ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics_collector.sh')}`, conn);
 
-        this.timer.subscribe(
+        const disposable = this.timer.subscribe(
             async (tick: number) => {
                 const cmdresult: RemoteCommandResult = await SSHClientUtility.remoteExeCommand(
                     `tail -n 1 ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics')}`, conn);
                 if (cmdresult !== undefined && cmdresult.stdout !== undefined) {
                     rmMeta.gpuSummary = <GPUSummary>JSON.parse(cmdresult.stdout);
+                    if (rmMeta.gpuSummary.gpuCount == 0) {
+                        this.log.warning(`No GPU found on remote machine ${rmMeta.ip}`);
+                        this.timer.unsubscribe(disposable);
+                    }
                 }
             }
         );
diff --git a/tools/nni_gpu_tool/gpu_metrics_collector.py b/tools/nni_gpu_tool/gpu_metrics_collector.py
index 7847a52a5c..41c1878486 100644
--- a/tools/nni_gpu_tool/gpu_metrics_collector.py
+++ b/tools/nni_gpu_tool/gpu_metrics_collector.py
@@ -56,6 +56,7 @@ def main(argv):
             parse_nvidia_smi_result(smi_output, metrics_output_dir)
         except FileNotFoundError:
             gen_empty_gpu_metric(metrics_output_dir)
+            break
         except:
             exception = sys.exc_info()
             for e in exception:

From 674f75b79a55636abfc975886c306a9e5056bdeb Mon Sep 17 00:00:00 2001
From: liuzhe <zhe.liu@microsoft.com>
Date: Thu, 8 Aug 2019 16:18:06 +0800
Subject: [PATCH 4/7] Fix minor issues

---
 examples/trials/mnist/config.yml                              | 2 +-
 .../remote_machine/remoteMachineTrainingService.ts            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/trials/mnist/config.yml b/examples/trials/mnist/config.yml
index 231c661ec3..794ca1cef6 100644
--- a/examples/trials/mnist/config.yml
+++ b/examples/trials/mnist/config.yml
@@ -18,4 +18,4 @@ tuner:
 trial:
   command: python3 mnist.py
   codeDir: .
-  gpuNum: 1
+  gpuNum: 0
diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
index b9d42f6fc3..35631f1ce9 100644
--- a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
+++ b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
@@ -511,13 +511,13 @@ class RemoteMachineTrainingService implements TrainingService {
         // tslint:disable-next-line: no-floating-promises
         SSHClientUtility.remoteExeCommand(`bash ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics_collector.sh')}`, conn);
 
-        const disposable = this.timer.subscribe(
+        const disposable: Rx.IDisposable = this.timer.subscribe(
             async (tick: number) => {
                 const cmdresult: RemoteCommandResult = await SSHClientUtility.remoteExeCommand(
                     `tail -n 1 ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics')}`, conn);
                 if (cmdresult !== undefined && cmdresult.stdout !== undefined) {
                     rmMeta.gpuSummary = <GPUSummary>JSON.parse(cmdresult.stdout);
-                    if (rmMeta.gpuSummary.gpuCount == 0) {
+                    if (rmMeta.gpuSummary.gpuCount === 0) {
                         this.log.warning(`No GPU found on remote machine ${rmMeta.ip}`);
                         this.timer.unsubscribe(disposable);
                     }

From f09f6ef99ce6a5a8554a54246e48155efe309f97 Mon Sep 17 00:00:00 2001
From: liuzhe <zhe.liu@microsoft.com>
Date: Fri, 9 Aug 2019 09:50:24 +0800
Subject: [PATCH 5/7] Catch other errors and bugfix

---
 src/nni_manager/training_service/local/gpuScheduler.ts   | 2 +-
 .../remote_machine/remoteMachineTrainingService.ts       | 2 +-
 tools/nni_gpu_tool/gpu_metrics_collector.py              | 9 +++------
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/nni_manager/training_service/local/gpuScheduler.ts b/src/nni_manager/training_service/local/gpuScheduler.ts
index 03e8d91afa..017a2af38c 100644
--- a/src/nni_manager/training_service/local/gpuScheduler.ts
+++ b/src/nni_manager/training_service/local/gpuScheduler.ts
@@ -54,7 +54,7 @@ class GPUScheduler {
             } catch (error) {
                 this.log.error('Read GPU summary failed with error: ', error);
             }
-            if (this.gpuSummary.gpuCount === 0) {
+            if (this.gpuSummary !== undefined && this.gpuSummary.gpuCount === 0) {
                 throw new Error('GPU not available. Please check your CUDA configuration');
             }
             await delay(5000);
diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
index 35631f1ce9..26e172f6fe 100644
--- a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
+++ b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
@@ -517,7 +517,7 @@ class RemoteMachineTrainingService implements TrainingService {
                     `tail -n 1 ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics')}`, conn);
                 if (cmdresult !== undefined && cmdresult.stdout !== undefined) {
                     rmMeta.gpuSummary = <GPUSummary>JSON.parse(cmdresult.stdout);
-                    if (rmMeta.gpuSummary.gpuCount === 0) {
+                    if (rmMeta.gpuSummary !== undefined && rmMeta.gpuSummary.gpuCount === 0) {
                         this.log.warning(`No GPU found on remote machine ${rmMeta.ip}`);
                         this.timer.unsubscribe(disposable);
                     }
diff --git a/tools/nni_gpu_tool/gpu_metrics_collector.py b/tools/nni_gpu_tool/gpu_metrics_collector.py
index 41c1878486..6b0681c5e4 100644
--- a/tools/nni_gpu_tool/gpu_metrics_collector.py
+++ b/tools/nni_gpu_tool/gpu_metrics_collector.py
@@ -53,14 +53,11 @@ def main(argv):
     while(True):
         try:
             smi_output = subprocess.check_output(cmd)
-            parse_nvidia_smi_result(smi_output, metrics_output_dir)
-        except FileNotFoundError:
+        except Exception:
+            traceback.print_exc()
             gen_empty_gpu_metric(metrics_output_dir)
             break
-        except:
-            exception = sys.exc_info()
-            for e in exception:
-                print("job exporter error {}".format(e))
+        parse_nvidia_smi_result(smi_output, metrics_output_dir)
         # TODO: change to sleep time configurable via arguments
         time.sleep(5)
 

From ea0f70bb52f27287f09a9281e73637974353d70e Mon Sep 17 00:00:00 2001
From: liuzhe <zhe.liu@microsoft.com>
Date: Fri, 9 Aug 2019 10:32:55 +0800
Subject: [PATCH 6/7] Avoid using chmod

---
 tools/nni_gpu_tool/gpu_metrics_collector.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/nni_gpu_tool/gpu_metrics_collector.py b/tools/nni_gpu_tool/gpu_metrics_collector.py
index 6b0681c5e4..f58b9b2895 100644
--- a/tools/nni_gpu_tool/gpu_metrics_collector.py
+++ b/tools/nni_gpu_tool/gpu_metrics_collector.py
@@ -46,9 +46,6 @@ def main(argv):
     if check_ready_to_run() == False:
         # GPU metrics collector is already running. Exit
         exit(2)
-    with open(os.path.join(metrics_output_dir, "gpu_metrics"), "w") as outputFile:
-        pass
-    os.chmod(os.path.join(metrics_output_dir, "gpu_metrics"), 0o777)
     cmd = 'nvidia-smi -q -x'.split()
     while(True):
         try:
@@ -63,6 +60,7 @@ def main(argv):
 
 def parse_nvidia_smi_result(smi, outputDir):
     try:
+        old_umask = os.umask(0)
         xmldoc = minidom.parseString(smi)
         gpuList = xmldoc.getElementsByTagName('gpu')
         with open(os.path.join(outputDir, "gpu_metrics"), 'a') as outputFile:
@@ -86,9 +84,12 @@ def parse_nvidia_smi_result(smi, outputDir):
     except :
         e_info = sys.exc_info()
         print('xmldoc paring error')
+    finally:
+        os.umask(old_umask)
 
 def gen_empty_gpu_metric(outputDir):
     try:
+        old_umask = os.umask(0)
         with open(os.path.join(outputDir, "gpu_metrics"), 'a') as outputFile:
             outPut = {}
             outPut["Timestamp"] = time.asctime(time.localtime())
@@ -99,6 +100,8 @@ def gen_empty_gpu_metric(outputDir):
             outputFile.flush()
     except Exception:
         traceback.print_exc()
+    finally:
+        os.umask(old_umask)
 
 
 if __name__ == "__main__":

From d0de4bd9f6d9868df024011101eb476bd23c5583 Mon Sep 17 00:00:00 2001
From: liuzhe <zhe.liu@microsoft.com>
Date: Fri, 9 Aug 2019 10:40:44 +0800
Subject: [PATCH 7/7] Minor fix

---
 .../remote_machine/remoteMachineTrainingService.ts              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
index 26e172f6fe..35631f1ce9 100644
--- a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
+++ b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
@@ -517,7 +517,7 @@ class RemoteMachineTrainingService implements TrainingService {
                     `tail -n 1 ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics')}`, conn);
                 if (cmdresult !== undefined && cmdresult.stdout !== undefined) {
                     rmMeta.gpuSummary = <GPUSummary>JSON.parse(cmdresult.stdout);
-                    if (rmMeta.gpuSummary !== undefined && rmMeta.gpuSummary.gpuCount === 0) {
+                    if (rmMeta.gpuSummary.gpuCount === 0) {
                         this.log.warning(`No GPU found on remote machine ${rmMeta.ip}`);
                         this.timer.unsubscribe(disposable);
                     }