@@ -90,9 +90,9 @@ public void testBasicRunJobForDistributedTraining() throws Exception {
9090 new String [] { "--name" , "my-job" , "--docker_image" , "tf-docker:1.1.0" ,
9191 "--input_path" , "hdfs://input" , "--checkpoint_path" , "hdfs://output" ,
9292 "--num_workers" , "3" , "--num_ps" , "2" , "--worker_launch_cmd" ,
93- "python run-job.py" , "--worker_resources" , "memory=2048M,vcores=2" ,
93+ "ambari- python-wrap run-job.py" , "--worker_resources" , "memory=2048M,vcores=2" ,
9494 "--ps_resources" , "memory=4G,vcores=4" , "--tensorboard" , "true" ,
95- "--ps_launch_cmd" , "python run-ps.py" , "--keytab" , "/keytab/path" ,
95+ "--ps_launch_cmd" , "ambari- python-wrap run-ps.py" , "--keytab" , "/keytab/path" ,
9696 "--principal" , "user/_HOST@domain.com" , "--distribute_keytab" ,
9797 "--verbose" });
9898
@@ -101,11 +101,11 @@ public void testBasicRunJobForDistributedTraining() throws Exception {
101101 Assert .assertEquals (jobRunParameters .getInputPath (), "hdfs://input" );
102102 Assert .assertEquals (jobRunParameters .getCheckpointPath (), "hdfs://output" );
103103 Assert .assertEquals (jobRunParameters .getNumPS (), 2 );
104- Assert .assertEquals (jobRunParameters .getPSLaunchCmd (), "python run-ps.py" );
104+ Assert .assertEquals (jobRunParameters .getPSLaunchCmd (), "ambari- python-wrap run-ps.py" );
105105 Assert .assertEquals (Resources .createResource (4096 , 4 ),
106106 jobRunParameters .getPsResource ());
107107 Assert .assertEquals (jobRunParameters .getWorkerLaunchCmd (),
108- "python run-job.py" );
108+ "ambari- python-wrap run-job.py" );
109109 Assert .assertEquals (Resources .createResource (2048 , 2 ),
110110 jobRunParameters .getWorkerResource ());
111111 Assert .assertEquals (jobRunParameters .getDockerImageName (),
@@ -126,7 +126,7 @@ public void testBasicRunJobForSingleNodeTraining() throws Exception {
126126 runJobCli .run (
127127 new String [] { "--name" , "my-job" , "--docker_image" , "tf-docker:1.1.0" ,
128128 "--input_path" , "hdfs://input" , "--checkpoint_path" , "hdfs://output" ,
129- "--num_workers" , "1" , "--worker_launch_cmd" , "python run-job.py" ,
129+ "--num_workers" , "1" , "--worker_launch_cmd" , "ambari- python-wrap run-job.py" ,
130130 "--worker_resources" , "memory=4g,vcores=2" , "--tensorboard" ,
131131 "true" , "--verbose" , "--wait_job_finish" });
132132
@@ -136,7 +136,7 @@ public void testBasicRunJobForSingleNodeTraining() throws Exception {
136136 Assert .assertEquals (jobRunParameters .getCheckpointPath (), "hdfs://output" );
137137 Assert .assertEquals (jobRunParameters .getNumWorkers (), 1 );
138138 Assert .assertEquals (jobRunParameters .getWorkerLaunchCmd (),
139- "python run-job.py" );
139+ "ambari- python-wrap run-job.py" );
140140 Assert .assertEquals (Resources .createResource (4096 , 2 ),
141141 jobRunParameters .getWorkerResource ());
142142 Assert .assertTrue (SubmarineLogs .isVerbose ());
@@ -152,7 +152,7 @@ public void testNoInputPathOptionSpecified() throws Exception {
152152 runJobCli .run (
153153 new String []{"--name" , "my-job" , "--docker_image" , "tf-docker:1.1.0" ,
154154 "--checkpoint_path" , "hdfs://output" ,
155- "--num_workers" , "1" , "--worker_launch_cmd" , "python run-job.py" ,
155+ "--num_workers" , "1" , "--worker_launch_cmd" , "ambari- python-wrap run-job.py" ,
156156 "--worker_resources" , "memory=4g,vcores=2" , "--tensorboard" ,
157157 "true" , "--verbose" , "--wait_job_finish" });
158158 } catch (ParseException e ) {
@@ -190,14 +190,14 @@ public void testLaunchCommandPatternReplace() throws Exception {
190190 new String [] { "--name" , "my-job" , "--docker_image" , "tf-docker:1.1.0" ,
191191 "--input_path" , "hdfs://input" , "--checkpoint_path" , "hdfs://output" ,
192192 "--num_workers" , "3" , "--num_ps" , "2" , "--worker_launch_cmd" ,
193- "python run-job.py --input=%input_path% --model_dir=%checkpoint_path% --export_dir=%saved_model_path%/savedmodel" ,
193+ "ambari- python-wrap run-job.py --input=%input_path% --model_dir=%checkpoint_path% --export_dir=%saved_model_path%/savedmodel" ,
194194 "--worker_resources" , "memory=2048,vcores=2" , "--ps_resources" ,
195195 "memory=4096,vcores=4" , "--tensorboard" , "true" , "--ps_launch_cmd" ,
196- "python run-ps.py --input=%input_path% --model_dir=%checkpoint_path%/model" ,
196+ "ambari- python-wrap run-ps.py --input=%input_path% --model_dir=%checkpoint_path%/model" ,
197197 "--verbose" });
198198
199199 Assert .assertEquals (
200- "python run-job.py --input=hdfs://input --model_dir=hdfs://output "
200+ "ambari- python-wrap run-job.py --input=hdfs://input --model_dir=hdfs://output "
201201 + "--export_dir=hdfs://output/savedmodel" ,
202202 runJobCli .getRunJobParameters ().getWorkerLaunchCmd ());
203203 Assert .assertEquals (
0 commit comments